From 7c6d02c5c0d5b66c7af27f44d2689c6c49c7f8d6 Mon Sep 17 00:00:00 2001 From: Breakthrough Date: Tue, 21 Apr 2026 21:54:54 -0400 Subject: [PATCH 1/3] [cli] Finalize `save-xml` for release Tested FCPX and FCP7 formats on DaVinci Resolve. --- scenedetect.cfg | 14 ++++ scenedetect/_cli/__init__.py | 4 +- scenedetect/_cli/commands.py | 157 ++++++++++++++++++++++------------- tests/test_cli.py | 109 ++++++++++++++++++++++++ tests/test_vfr.py | 26 ++++++ website/pages/changelog.md | 2 +- 6 files changed, 253 insertions(+), 59 deletions(-) diff --git a/scenedetect.cfg b/scenedetect.cfg index f0901cc0..729c1074 100644 --- a/scenedetect.cfg +++ b/scenedetect.cfg @@ -345,6 +345,20 @@ #disable-shift = no +[save-xml] + +# Filename format of XML file. Can use $VIDEO_NAME macro. +#filename = $VIDEO_NAME.xml + +# Format of the XML file. Must be one of: +# - fcpx: Final Cut Pro X (FCPXML, default) +# - fcp: Final Cut Pro 7 (xmeml) +#format = fcpx + +# Folder to output XML file to. Overrides [global] output option. +#output = /usr/tmp/images + + # # BACKEND OPTIONS # diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py index 88dc9654..ab169e16 100644 --- a/scenedetect/_cli/__init__.py +++ b/scenedetect/_cli/__init__.py @@ -1614,10 +1614,10 @@ def save_qp_command( ctx.add_command(cli_commands.save_qp, save_qp_args) -SAVE_XML_HELP = """[IN DEVELOPMENT] Save cuts in XML format.""" +SAVE_XML_HELP = """Save cuts in Final Cut Pro XML format (FCP7 xmeml or FCPX).""" -@click.command("save-xml", cls=Command, help=SAVE_XML_HELP, hidden=True) +@click.command("save-xml", cls=Command, help=SAVE_XML_HELP) @click.option( "--filename", "-f", diff --git a/scenedetect/_cli/commands.py b/scenedetect/_cli/commands.py index 0003f30e..d6f673f8 100644 --- a/scenedetect/_cli/commands.py +++ b/scenedetect/_cli/commands.py @@ -20,7 +20,7 @@ import os.path import typing as ty import webbrowser -from datetime import datetime +from fractions import Fraction from pathlib import Path from string import Template from xml.dom import minidom @@ -311,68 +311,89 @@ def get_edl_timecode(timecode: FrameTimecode): f.write("\n") +def _rational_seconds(value: Fraction) -> str: + """Format a `Fraction` as an FCPXML rational time string. + + FCPXML expresses time as `/s` (or `s` for whole seconds). + See https://developer.apple.com/documentation/professional-video-applications/fcpxml-reference + """ + if value.denominator == 1: + return f"{value.numerator}s" + return f"{value.numerator}/{value.denominator}s" + + +def _frame_timecode_seconds(tc: FrameTimecode) -> Fraction: + """Exact seconds for `tc` as a `Fraction`, derived from PTS × time base.""" + return Fraction(tc.pts) * tc.time_base + + def _save_xml_fcpx( context: CliContext, scenes: SceneList, filename: str, output: str, ): - """Saves scenes in Final Cut Pro X XML format.""" - ASSET_ID = "asset1" - FORMAT_ID = "format1" - # TODO: Need to handle other video formats! - VIDEO_FORMAT_TODO_HANDLE_OTHERS = "FFVideoFormat1080p24" + """Saves scenes in Final Cut Pro X XML format (FCPXML 1.9). + + The output follows Apple's FCPXML schema with rational-second time values and + a custom `` derived from the source video's frame rate and resolution. + See https://developer.apple.com/documentation/professional-video-applications/fcpxml-reference + """ + ASSET_ID = "r2" + FORMAT_ID = "r1" + + frame_rate = context.video_stream.frame_rate + frame_duration = _rational_seconds(Fraction(frame_rate.denominator, frame_rate.numerator)) + width, height = context.video_stream.frame_size + video_name = context.video_stream.name + src_uri = Path(context.video_stream.path).absolute().as_uri() + total_duration = _rational_seconds(_frame_timecode_seconds(scenes[-1][1] - scenes[0][0])) root = ElementTree.Element("fcpxml", version="1.9") resources = ElementTree.SubElement(root, "resources") - ElementTree.SubElement(resources, "format", id="format1", name=VIDEO_FORMAT_TODO_HANDLE_OTHERS) - - video_name = context.video_stream.name - - # TODO: We should calculate duration from the scene list. - duration = context.video_stream.duration - duration = str(duration.seconds) + "s" # TODO: Is float okay here? - path = Path(context.video_stream.path).absolute() + # `name` is cosmetic: Apple publishes no authoritative FFVideoFormat* list, and editors key + # off frameDuration/width/height. We emit a generated name for display only. + format_name = f"FFVideoFormat{height}p{round(float(frame_rate) * 100):04d}" ElementTree.SubElement( + resources, + "format", + id=FORMAT_ID, + name=format_name, + frameDuration=frame_duration, + width=str(width), + height=str(height), + ) + asset = ElementTree.SubElement( resources, "asset", id=ASSET_ID, name=video_name, - src=str(path), - duration=duration, + start="0s", + duration=total_duration, hasVideo="1", - hasAudio="1", # TODO: Handle case of no audio. format=FORMAT_ID, ) + ElementTree.SubElement(asset, "media-rep", kind="original-media", src=src_uri) library = ElementTree.SubElement(root, "library") - now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - event = ElementTree.SubElement(library, "event", name=f"Shot Detection {now}") - project = ElementTree.SubElement( - event, "project", name=video_name - ) # TODO: Allow customizing project name. - sequence = ElementTree.SubElement(project, "sequence", format=FORMAT_ID, duration=duration) + event = ElementTree.SubElement(library, "event", name=video_name) + project = ElementTree.SubElement(event, "project", name=video_name) + sequence = ElementTree.SubElement( + project, "sequence", format=FORMAT_ID, duration=total_duration, tcStart="0s", tcFormat="NDF" + ) spine = ElementTree.SubElement(sequence, "spine") for i, (start, end) in enumerate(scenes): - start_seconds = start.seconds - duration_seconds = (end - start).seconds - clip = ElementTree.SubElement( - spine, - "clip", - name=f"Shot {i + 1}", - duration=f"{duration_seconds:.3f}s", - start=f"{start_seconds:.3f}s", - offset=f"{start_seconds:.3f}s", - ) + scene_start = _rational_seconds(_frame_timecode_seconds(start)) + scene_duration = _rational_seconds(_frame_timecode_seconds(end - start)) ElementTree.SubElement( - clip, + spine, "asset-clip", - ref=ASSET_ID, - duration=f"{duration_seconds:.3f}s", - start=f"{start_seconds:.3f}s", - offset="0s", name=f"Shot {i + 1}", + ref=ASSET_ID, + offset=scene_start, + start=scene_start, + duration=scene_duration, ) pretty_xml = minidom.parseString(ElementTree.tostring(root, encoding="unicode")).toprettyxml( @@ -393,7 +414,11 @@ def _save_xml_fcp( filename: str, output: str, ): - """Saves scenes in Final Cut Pro 7 XML format.""" + """Saves scenes in Final Cut Pro 7 XML (xmeml) format. + + See https://developer.apple.com/library/archive/documentation/AppleApplications/Reference/FinalCutPro_XML/ + for the element reference. `pathurl` must be a valid `file://` URI per the xmeml spec. + """ assert scenes root = ElementTree.Element("xmeml", version="5") project = ElementTree.SubElement(root, "project") @@ -417,39 +442,59 @@ def _save_xml_fcp( ElementTree.SubElement(timecode, "frame").text = "0" ElementTree.SubElement(timecode, "displayformat").text = "NDF" + width, height = context.video_stream.frame_size media = ElementTree.SubElement(sequence, "media") video = ElementTree.SubElement(media, "video") format = ElementTree.SubElement(video, "format") - ElementTree.SubElement(format, "samplecharacteristics") + sample_chars = ElementTree.SubElement(format, "samplecharacteristics") + ElementTree.SubElement(sample_chars, "width").text = str(width) + ElementTree.SubElement(sample_chars, "height").text = str(height) track = ElementTree.SubElement(video, "track") - # Add clips for each shot boundary + path_uri = Path(context.video_stream.path).absolute().as_uri() + # Source media total duration in frames at the declared timebase. Required on `` so NLEs + # (DaVinci Resolve, Premiere) can seek into the source — without it the clip plays frozen. + source_duration_frames = ( + str(round(context.video_stream.duration.seconds * fps)) + if context.video_stream.duration is not None + else str(round(scenes[-1][1].seconds * fps)) + ) + FILE_ID = "file1" + for i, (start, end) in enumerate(scenes): clip = ElementTree.SubElement(track, "clipitem") ElementTree.SubElement(clip, "name").text = f"Shot {i + 1}" ElementTree.SubElement(clip, "enabled").text = "TRUE" - ElementTree.SubElement(clip, "rate").append( - ElementTree.fromstring(f"{round(fps)}") - ) + ElementTree.SubElement(clip, "duration").text = source_duration_frames + clip_rate = ElementTree.SubElement(clip, "rate") + ElementTree.SubElement(clip_rate, "timebase").text = str(round(fps)) + ElementTree.SubElement(clip_rate, "ntsc").text = ntsc # Frame numbers relative to the declared fps, computed from PTS seconds. ElementTree.SubElement(clip, "start").text = str(round(start.seconds * fps)) ElementTree.SubElement(clip, "end").text = str(round(end.seconds * fps)) ElementTree.SubElement(clip, "in").text = str(round(start.seconds * fps)) ElementTree.SubElement(clip, "out").text = str(round(end.seconds * fps)) - file_ref = ElementTree.SubElement(clip, "file", id=f"file{i + 1}") - ElementTree.SubElement(file_ref, "name").text = context.video_stream.name - path = Path(context.video_stream.path).absolute() - # TODO: Can we just use path.as_uri() here? - # On Windows this should be: file://localhost/C:/Users/... according to the samples provided - # from https://github.com/Breakthrough/PySceneDetect/issues/156#issuecomment-1076213412. - ElementTree.SubElement(file_ref, "pathurl").text = f"file://{path}" - - media_ref = ElementTree.SubElement(file_ref, "media") - video_ref = ElementTree.SubElement(media_ref, "video") - ElementTree.SubElement(video_ref, "samplecharacteristics") + # xmeml allows a single full `` declaration reused via `` on + # subsequent clipitems. Emit full details on the first, then self-close on the rest. + if i == 0: + file_ref = ElementTree.SubElement(clip, "file", id=FILE_ID) + ElementTree.SubElement(file_ref, "name").text = context.video_stream.name + ElementTree.SubElement(file_ref, "pathurl").text = path_uri + ElementTree.SubElement(file_ref, "duration").text = source_duration_frames + file_rate = ElementTree.SubElement(file_ref, "rate") + ElementTree.SubElement(file_rate, "timebase").text = str(round(fps)) + ElementTree.SubElement(file_rate, "ntsc").text = ntsc + media_ref = ElementTree.SubElement(file_ref, "media") + video_ref = ElementTree.SubElement(media_ref, "video") + clip_chars = ElementTree.SubElement(video_ref, "samplecharacteristics") + ElementTree.SubElement(clip_chars, "width").text = str(width) + ElementTree.SubElement(clip_chars, "height").text = str(height) + else: + ElementTree.SubElement(clip, "file", id=FILE_ID) + link = ElementTree.SubElement(clip, "link") - ElementTree.SubElement(link, "linkclipref").text = f"file{i + 1}" + ElementTree.SubElement(link, "linkclipref").text = FILE_ID ElementTree.SubElement(link, "mediatype").text = "video" pretty_xml = minidom.parseString(ElementTree.tostring(root, encoding="unicode")).toprettyxml( diff --git a/tests/test_cli.py b/tests/test_cli.py index 3f77e91f..62a70114 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1160,3 +1160,112 @@ def test_cli_save_otio_no_audio(tmp_path: Path): assert output_path.read_text() == EXPECTED_OTIO_OUTPUT.replace( "{ABSOLUTE_PATH}", os.path.abspath(DEFAULT_VIDEO_PATH).replace("\\", "\\\\") ) + + +def test_cli_save_xml_fcpx(tmp_path: Path): + """Test `save-xml --format fcpx` produces a valid FCPXML 1.9 file.""" + from xml.etree import ElementTree + + exit_code, _ = invoke_cli( + [ + "-i", + DEFAULT_VIDEO_PATH, + "-o", + str(tmp_path), + "time", + "-s", + "2s", + "-d", + "4s", + "detect-content", + "save-xml", + ] + ) + assert exit_code == 0 + output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}.xml") + assert os.path.exists(output_path) + + root = ElementTree.parse(output_path).getroot() + assert root.tag == "fcpxml" + assert root.attrib["version"] == "1.9" + + # Format carries the rational frameDuration derived from the video's 24000/1001 fps. + fmt = root.find("resources/format") + assert fmt is not None + assert fmt.attrib["frameDuration"] == "1001/24000s" + assert fmt.attrib["width"] == "1280" + assert fmt.attrib["height"] == "544" + + # Asset references the source video via a file:// URI. + media_rep = root.find("resources/asset/media-rep") + assert media_rep is not None + assert media_rep.attrib["src"].startswith("file://") + assert media_rep.attrib["src"].endswith("goldeneye.mp4") + + # Spine contains one `` per scene (not wrapped in ``). + asset_clips = root.findall("library/event/project/sequence/spine/asset-clip") + assert len(asset_clips) == 2 + # All clip time attributes are rational strings ending in "s". + for clip in asset_clips: + for attr in ("offset", "start", "duration"): + assert clip.attrib[attr].endswith("s") + + +def test_cli_save_xml_fcp(tmp_path: Path): + """Test `save-xml --format fcp` produces a valid FCP7 xmeml file.""" + from xml.etree import ElementTree + + exit_code, _ = invoke_cli( + [ + "-i", + DEFAULT_VIDEO_PATH, + "-o", + str(tmp_path), + "time", + "-s", + "2s", + "-d", + "4s", + "detect-content", + "save-xml", + "--format", + "fcp", + ] + ) + assert exit_code == 0 + output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}.xml") + assert os.path.exists(output_path) + + root = ElementTree.parse(output_path).getroot() + assert root.tag == "xmeml" + assert root.attrib["version"] == "5" + + # NTSC flag is True for the 23.976 test video. + ntsc = root.find("project/sequence/rate/ntsc") + assert ntsc is not None and ntsc.text == "True" + + # samplecharacteristics carry width/height so Premiere/DaVinci can ingest. + width = root.find("project/sequence/media/video/format/samplecharacteristics/width") + height = root.find("project/sequence/media/video/format/samplecharacteristics/height") + assert width is not None and width.text == "1280" + assert height is not None and height.text == "544" + + # Two clipitems produced; first carries the full block, rest reference it by id. + clipitems = root.findall("project/sequence/media/video/track/clipitem") + assert len(clipitems) == 2 + + first_file = clipitems[0].find("file") + assert first_file is not None + assert first_file.attrib["id"] == "file1" + pathurl = first_file.find("pathurl") + assert pathurl is not None and pathurl.text is not None + assert pathurl.text.startswith("file://") + assert pathurl.text.endswith("goldeneye.mp4") + # Source duration is required for NLEs to seek into the media. + assert first_file.find("duration") is not None + + # Subsequent clipitems reference the same file id without redeclaring. + second_file = clipitems[1].find("file") + assert second_file is not None + assert second_file.attrib["id"] == "file1" + assert second_file.find("pathurl") is None diff --git a/tests/test_vfr.py b/tests/test_vfr.py index 09aab163..c98d80e5 100644 --- a/tests/test_vfr.py +++ b/tests/test_vfr.py @@ -381,6 +381,32 @@ def test_vfr_edl_export(test_vfr_video: str, tmp_path): assert "001 AX V" in content +@pytest.mark.parametrize("xml_format", ["fcpx", "fcp"]) +def test_vfr_xml_export(test_vfr_video: str, xml_format: str, tmp_path): + """`save-xml` should succeed on VFR video and produce well-formed output in either dialect.""" + from xml.etree import ElementTree + + exit_code, _ = invoke_cli( + [ + "-i", + test_vfr_video, + "-o", + str(tmp_path), + "detect-content", + "time", + "--end", + "10s", + "save-xml", + "--format", + xml_format, + ] + ) + assert exit_code == 0 + xml_path = next(tmp_path.glob("*.xml")) + root = ElementTree.parse(xml_path).getroot() + assert root.tag == ("fcpxml" if xml_format == "fcpx" else "xmeml") + + def test_vfr_csv_backend_conformance(test_vfr_video: str): """PyAV and OpenCV should produce identical scene timecodes for VFR video. diff --git a/website/pages/changelog.md b/website/pages/changelog.md index e7d61ed0..96af64d8 100644 --- a/website/pages/changelog.md +++ b/website/pages/changelog.md @@ -676,7 +676,7 @@ Although there have been minimal changes to most API examples, there are several ### CLI Changes - [feature] VFR videos are handled correctly by the OpenCV and PyAV backends, and should work correctly with default parameters -- [feature] New `save-xml` command supports saving scenes in Final Cut Pro formats [#156](https://github.com/Breakthrough/PySceneDetect/issues/156) +- [feature] New `save-xml` command allows exporting in Final Cut Pro format (FCP7/FCPX) [#156](https://github.com/Breakthrough/PySceneDetect/issues/156) - [feature] `--min-scene-len`/`-m` and `save-images --frame-margin`/`-m` now accept seconds (e.g. `0.6s`) and timecodes (e.g. `00:00:00.600`) in addition to a frame count [#531](https://github.com/Breakthrough/PySceneDetect/issues/531) - [bugfix] Fix floating-point precision error in `save-otio` output where frame values near integer boundaries (e.g. `90.00000000000001`) were serialized with spurious precision - [bugfix] Add mitigation for transient `OSError` in the MoviePy backend as it is susceptible to subprocess pipe races on slow or heavily loaded systems [#496](https://github.com/Breakthrough/PySceneDetect/issues/496) From c8386499553ebb5182d46286a11e63d672efc96b Mon Sep 17 00:00:00 2001 From: Breakthrough Date: Tue, 21 Apr 2026 22:14:39 -0400 Subject: [PATCH 2/3] [api] Add EDL, FCP7/X, and OTIO formats to scenedetect.output Rename save-xml -> save-fcp. --- docs/api/migration_guide.rst | 2 +- docs/api/output.rst | 8 + scenedetect.cfg | 4 +- scenedetect/_cli/__init__.py | 28 +-- scenedetect/_cli/commands.py | 393 +++++---------------------------- scenedetect/_cli/config.py | 14 +- scenedetect/output/__init__.py | 389 ++++++++++++++++++++++++++++++++ tests/test_cli.py | 14 +- tests/test_output.py | 214 ++++++++++++++++++ tests/test_vfr.py | 12 +- website/pages/changelog.md | 3 +- 11 files changed, 705 insertions(+), 376 deletions(-) diff --git a/docs/api/migration_guide.rst b/docs/api/migration_guide.rst index a3ae9fa7..e361a3a6 100644 --- a/docs/api/migration_guide.rst +++ b/docs/api/migration_guide.rst @@ -203,4 +203,4 @@ CLI Changes - The ``-d``/``--min-delta-hsv`` option on ``detect-adaptive`` has been removed. Use ``-c``/``--min-content-val`` instead. - VFR videos now work correctly with both the OpenCV and PyAV backends. -- New ``save-xml`` command for exporting scenes in Final Cut Pro XML format. +- New ``save-fcp`` command for exporting scenes in Final Cut Pro XML format. diff --git a/docs/api/output.rst b/docs/api/output.rst index 480e4371..8b594b9b 100644 --- a/docs/api/output.rst +++ b/docs/api/output.rst @@ -20,6 +20,14 @@ Ouptut .. autofunction:: scenedetect.output.write_scene_list +.. autofunction:: scenedetect.output.write_scene_list_edl + +.. autofunction:: scenedetect.output.write_scene_list_fcpx + +.. autofunction:: scenedetect.output.write_scene_list_fcp7 + +.. autofunction:: scenedetect.output.write_scene_list_otio + .. autoclass:: scenedetect.output.SceneMetadata .. autoclass:: scenedetect.output.VideoMetadata diff --git a/scenedetect.cfg b/scenedetect.cfg index 729c1074..a18189e5 100644 --- a/scenedetect.cfg +++ b/scenedetect.cfg @@ -345,14 +345,14 @@ #disable-shift = no -[save-xml] +[save-fcp] # Filename format of XML file. Can use $VIDEO_NAME macro. #filename = $VIDEO_NAME.xml # Format of the XML file. Must be one of: # - fcpx: Final Cut Pro X (FCPXML, default) -# - fcp: Final Cut Pro 7 (xmeml) +# - fcp7: Final Cut Pro 7 (xmeml) #format = fcpx # Folder to output XML file to. Overrides [global] output option. diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py index ab169e16..59479bcd 100644 --- a/scenedetect/_cli/__init__.py +++ b/scenedetect/_cli/__init__.py @@ -1614,27 +1614,27 @@ def save_qp_command( ctx.add_command(cli_commands.save_qp, save_qp_args) -SAVE_XML_HELP = """Save cuts in Final Cut Pro XML format (FCP7 xmeml or FCPX).""" +SAVE_FCP_HELP = """Save cuts in Final Cut Pro XML format (FCP7 xmeml or FCPX).""" -@click.command("save-xml", cls=Command, help=SAVE_XML_HELP) +@click.command("save-fcp", cls=Command, help=SAVE_FCP_HELP) @click.option( "--filename", "-f", metavar="NAME", default=None, type=click.STRING, - help="Filename format to use.%s" % (USER_CONFIG.get_help_string("save-xml", "filename")), + help="Filename format to use.%s" % (USER_CONFIG.get_help_string("save-fcp", "filename")), ) @click.option( "--format", metavar="TYPE", - type=click.Choice(CHOICE_MAP["save-xml"]["format"], False), + type=click.Choice(CHOICE_MAP["save-fcp"]["format"], False), default=None, help="Format to export. TYPE must be one of: %s.%s" % ( - ", ".join(CHOICE_MAP["save-xml"]["format"]), - USER_CONFIG.get_help_string("save-xml", "format"), + ", ".join(CHOICE_MAP["save-fcp"]["format"]), + USER_CONFIG.get_help_string("save-fcp", "format"), ), ) @click.option( @@ -1643,10 +1643,10 @@ def save_qp_command( metavar="DIR", type=click.Path(exists=False, dir_okay=True, writable=True, resolve_path=False), help="Output directory to save XML file to. Overrides global option -o/--output.%s" - % (USER_CONFIG.get_help_string("save-xml", "output", show_default=False)), + % (USER_CONFIG.get_help_string("save-fcp", "output", show_default=False)), ) @click.pass_context -def save_xml_command( +def save_fcp_command( ctx: click.Context, filename: ty.Optional[ty.AnyStr], format: ty.Optional[ty.AnyStr], @@ -1655,12 +1655,12 @@ def save_xml_command( ctx = ctx.obj assert isinstance(ctx, CliContext) - save_xml_args = { - "filename": ctx.config.get_value("save-xml", "filename", filename), - "format": ctx.config.get_value("save-xml", "format", format), - "output": ctx.config.get_value("save-xml", "output", output), + save_fcp_args = { + "filename": ctx.config.get_value("save-fcp", "filename", filename), + "format": ctx.config.get_value("save-fcp", "format", format), + "output": ctx.config.get_value("save-fcp", "output", output), } - ctx.add_command(cli_commands.save_xml, save_xml_args) + ctx.add_command(cli_commands.save_fcp, save_fcp_args) SAVE_OTIO_HELP = """Save cuts as an OTIO timeline. @@ -1757,7 +1757,7 @@ def save_otio_command( scenedetect.add_command(save_html_command) scenedetect.add_command(save_images_command) scenedetect.add_command(save_qp_command) -scenedetect.add_command(save_xml_command) +scenedetect.add_command(save_fcp_command) scenedetect.add_command(save_otio_command) scenedetect.add_command(split_video_command) diff --git a/scenedetect/_cli/commands.py b/scenedetect/_cli/commands.py index d6f673f8..730ff277 100644 --- a/scenedetect/_cli/commands.py +++ b/scenedetect/_cli/commands.py @@ -15,27 +15,23 @@ current command-line context, as well as the processing result (scenes and cuts). """ -import json import logging -import os.path import typing as ty import webbrowser -from fractions import Fraction -from pathlib import Path from string import Template -from xml.dom import minidom -from xml.etree import ElementTree -import scenedetect -from scenedetect._cli.config import XmlFormat +from scenedetect._cli.config import FcpFormat from scenedetect._cli.context import CliContext -from scenedetect.common import FrameTimecode from scenedetect.output import save_images as save_images_impl from scenedetect.output import ( split_video_ffmpeg, split_video_mkvmerge, write_scene_list, + write_scene_list_edl, + write_scene_list_fcp7, + write_scene_list_fcpx, write_scene_list_html, + write_scene_list_otio, ) from scenedetect.platform import get_and_create_path from scenedetect.scene_manager import ( @@ -272,269 +268,63 @@ def save_edl( reel: str, ): """Handles the `save-edl` command. Outputs in CMX 3600 format.""" - # We only use scene information. - del cuts - - # Converts FrameTimecode to HH:MM:SS:FF - # TODO: This should be part of the FrameTimecode object itself. - def get_edl_timecode(timecode: FrameTimecode): - total_seconds = timecode.seconds - hours = int(total_seconds // 3600) - minutes = int((total_seconds % 3600) // 60) - seconds = int(total_seconds % 60) - frames_part = int((total_seconds * timecode.framerate) % timecode.framerate) - return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames_part:02d}" - - edl_content = [] - - title = Template(title).safe_substitute(VIDEO_NAME=context.video_stream.name) - edl_content.append(f"TITLE: {title}") - edl_content.append("FCM: NON-DROP FRAME") - edl_content.append("") - - # Add each shot as an edit entry - for i, (start, end) in enumerate(scenes): - in_tc = get_edl_timecode(start) - out_tc = get_edl_timecode(end) # Correct for presentation time - # Format the edit entry according to CMX 3600 format - event_line = f"{(i + 1):03d} {reel} V C {in_tc} {out_tc} {in_tc} {out_tc}" - edl_content.append(event_line) - - edl_path = get_and_create_path( - Template(filename).safe_substitute(VIDEO_NAME=context.video_stream.name), - output, - ) - logger.info(f"Writing scenes in EDL format to {edl_path}") - with open(edl_path, "w") as f: - f.write(f"* CREATED WITH PYSCENEDETECT {scenedetect.__version__}\n") - f.write("\n".join(edl_content)) - f.write("\n") - - -def _rational_seconds(value: Fraction) -> str: - """Format a `Fraction` as an FCPXML rational time string. - - FCPXML expresses time as `/s` (or `s` for whole seconds). - See https://developer.apple.com/documentation/professional-video-applications/fcpxml-reference - """ - if value.denominator == 1: - return f"{value.numerator}s" - return f"{value.numerator}/{value.denominator}s" - - -def _frame_timecode_seconds(tc: FrameTimecode) -> Fraction: - """Exact seconds for `tc` as a `Fraction`, derived from PTS × time base.""" - return Fraction(tc.pts) * tc.time_base - - -def _save_xml_fcpx( - context: CliContext, - scenes: SceneList, - filename: str, - output: str, -): - """Saves scenes in Final Cut Pro X XML format (FCPXML 1.9). - - The output follows Apple's FCPXML schema with rational-second time values and - a custom `` derived from the source video's frame rate and resolution. - See https://developer.apple.com/documentation/professional-video-applications/fcpxml-reference - """ - ASSET_ID = "r2" - FORMAT_ID = "r1" - - frame_rate = context.video_stream.frame_rate - frame_duration = _rational_seconds(Fraction(frame_rate.denominator, frame_rate.numerator)) - width, height = context.video_stream.frame_size + del cuts # We only use scene information. video_name = context.video_stream.name - src_uri = Path(context.video_stream.path).absolute().as_uri() - total_duration = _rational_seconds(_frame_timecode_seconds(scenes[-1][1] - scenes[0][0])) - - root = ElementTree.Element("fcpxml", version="1.9") - resources = ElementTree.SubElement(root, "resources") - # `name` is cosmetic: Apple publishes no authoritative FFVideoFormat* list, and editors key - # off frameDuration/width/height. We emit a generated name for display only. - format_name = f"FFVideoFormat{height}p{round(float(frame_rate) * 100):04d}" - ElementTree.SubElement( - resources, - "format", - id=FORMAT_ID, - name=format_name, - frameDuration=frame_duration, - width=str(width), - height=str(height), - ) - asset = ElementTree.SubElement( - resources, - "asset", - id=ASSET_ID, - name=video_name, - start="0s", - duration=total_duration, - hasVideo="1", - format=FORMAT_ID, - ) - ElementTree.SubElement(asset, "media-rep", kind="original-media", src=src_uri) - - library = ElementTree.SubElement(root, "library") - event = ElementTree.SubElement(library, "event", name=video_name) - project = ElementTree.SubElement(event, "project", name=video_name) - sequence = ElementTree.SubElement( - project, "sequence", format=FORMAT_ID, duration=total_duration, tcStart="0s", tcFormat="NDF" - ) - spine = ElementTree.SubElement(sequence, "spine") - - for i, (start, end) in enumerate(scenes): - scene_start = _rational_seconds(_frame_timecode_seconds(start)) - scene_duration = _rational_seconds(_frame_timecode_seconds(end - start)) - ElementTree.SubElement( - spine, - "asset-clip", - name=f"Shot {i + 1}", - ref=ASSET_ID, - offset=scene_start, - start=scene_start, - duration=scene_duration, - ) - - pretty_xml = minidom.parseString(ElementTree.tostring(root, encoding="unicode")).toprettyxml( - indent=" " - ) - xml_path = get_and_create_path( - Template(filename).safe_substitute(VIDEO_NAME=context.video_stream.name), + edl_path = get_and_create_path( + Template(filename).safe_substitute(VIDEO_NAME=video_name), output, ) - logger.info(f"Writing scenes in FCPX format to {xml_path}") - with open(xml_path, "w") as f: - f.write(pretty_xml) - - -def _save_xml_fcp( - context: CliContext, - scenes: SceneList, - filename: str, - output: str, -): - """Saves scenes in Final Cut Pro 7 XML (xmeml) format. - - See https://developer.apple.com/library/archive/documentation/AppleApplications/Reference/FinalCutPro_XML/ - for the element reference. `pathurl` must be a valid `file://` URI per the xmeml spec. - """ - assert scenes - root = ElementTree.Element("xmeml", version="5") - project = ElementTree.SubElement(root, "project") - ElementTree.SubElement(project, "name").text = context.video_stream.name - sequence = ElementTree.SubElement(project, "sequence") - ElementTree.SubElement(sequence, "name").text = context.video_stream.name - - fps = float(context.video_stream.frame_rate) - ntsc = "True" if context.video_stream.frame_rate.denominator != 1 else "False" - duration = scenes[-1][1] - scenes[0][0] - ElementTree.SubElement(sequence, "duration").text = str(round(duration.seconds * fps)) - - rate = ElementTree.SubElement(sequence, "rate") - ElementTree.SubElement(rate, "timebase").text = str(round(fps)) - ElementTree.SubElement(rate, "ntsc").text = ntsc - - timecode = ElementTree.SubElement(sequence, "timecode") - tc_rate = ElementTree.SubElement(timecode, "rate") - ElementTree.SubElement(tc_rate, "timebase").text = str(round(fps)) - ElementTree.SubElement(tc_rate, "ntsc").text = ntsc - ElementTree.SubElement(timecode, "frame").text = "0" - ElementTree.SubElement(timecode, "displayformat").text = "NDF" - - width, height = context.video_stream.frame_size - media = ElementTree.SubElement(sequence, "media") - video = ElementTree.SubElement(media, "video") - format = ElementTree.SubElement(video, "format") - sample_chars = ElementTree.SubElement(format, "samplecharacteristics") - ElementTree.SubElement(sample_chars, "width").text = str(width) - ElementTree.SubElement(sample_chars, "height").text = str(height) - track = ElementTree.SubElement(video, "track") - - path_uri = Path(context.video_stream.path).absolute().as_uri() - # Source media total duration in frames at the declared timebase. Required on `` so NLEs - # (DaVinci Resolve, Premiere) can seek into the source — without it the clip plays frozen. - source_duration_frames = ( - str(round(context.video_stream.duration.seconds * fps)) - if context.video_stream.duration is not None - else str(round(scenes[-1][1].seconds * fps)) - ) - FILE_ID = "file1" - - for i, (start, end) in enumerate(scenes): - clip = ElementTree.SubElement(track, "clipitem") - ElementTree.SubElement(clip, "name").text = f"Shot {i + 1}" - ElementTree.SubElement(clip, "enabled").text = "TRUE" - ElementTree.SubElement(clip, "duration").text = source_duration_frames - clip_rate = ElementTree.SubElement(clip, "rate") - ElementTree.SubElement(clip_rate, "timebase").text = str(round(fps)) - ElementTree.SubElement(clip_rate, "ntsc").text = ntsc - # Frame numbers relative to the declared fps, computed from PTS seconds. - ElementTree.SubElement(clip, "start").text = str(round(start.seconds * fps)) - ElementTree.SubElement(clip, "end").text = str(round(end.seconds * fps)) - ElementTree.SubElement(clip, "in").text = str(round(start.seconds * fps)) - ElementTree.SubElement(clip, "out").text = str(round(end.seconds * fps)) - - # xmeml allows a single full `` declaration reused via `` on - # subsequent clipitems. Emit full details on the first, then self-close on the rest. - if i == 0: - file_ref = ElementTree.SubElement(clip, "file", id=FILE_ID) - ElementTree.SubElement(file_ref, "name").text = context.video_stream.name - ElementTree.SubElement(file_ref, "pathurl").text = path_uri - ElementTree.SubElement(file_ref, "duration").text = source_duration_frames - file_rate = ElementTree.SubElement(file_ref, "rate") - ElementTree.SubElement(file_rate, "timebase").text = str(round(fps)) - ElementTree.SubElement(file_rate, "ntsc").text = ntsc - media_ref = ElementTree.SubElement(file_ref, "media") - video_ref = ElementTree.SubElement(media_ref, "video") - clip_chars = ElementTree.SubElement(video_ref, "samplecharacteristics") - ElementTree.SubElement(clip_chars, "width").text = str(width) - ElementTree.SubElement(clip_chars, "height").text = str(height) - else: - ElementTree.SubElement(clip, "file", id=FILE_ID) - - link = ElementTree.SubElement(clip, "link") - ElementTree.SubElement(link, "linkclipref").text = FILE_ID - ElementTree.SubElement(link, "mediatype").text = "video" - - pretty_xml = minidom.parseString(ElementTree.tostring(root, encoding="unicode")).toprettyxml( - indent=" " - ) - xml_path = get_and_create_path( - Template(filename).safe_substitute(VIDEO_NAME=context.video_stream.name), - output, + write_scene_list_edl( + output_path=edl_path, + scene_list=scenes, + title=Template(title).safe_substitute(VIDEO_NAME=video_name), + reel=reel, ) - logger.info(f"Writing scenes in FCP format to {xml_path}") - with open(xml_path, "w") as f: - f.write(pretty_xml) -def save_xml( +def save_fcp( context: CliContext, scenes: SceneList, cuts: CutList, filename: str, - format: XmlFormat, + format: FcpFormat, output: str, ): - """Handles the `save-xml` command.""" - # We only use scene information. - del cuts - + """Handles the `save-fcp` command.""" + del cuts # We only use scene information. if not scenes: return - if format == XmlFormat.FCPX: - _save_xml_fcpx(context, scenes, filename, output) - elif format == XmlFormat.FCP: - _save_xml_fcp(context, scenes, filename, output) + video_stream = context.video_stream + video_name = str(video_stream.name) + video_path = str(video_stream.path) + xml_path = get_and_create_path( + Template(filename).safe_substitute(VIDEO_NAME=video_name), + output, + ) + if format == FcpFormat.FCPX: + write_scene_list_fcpx( + output_path=xml_path, + scene_list=scenes, + video_path=video_path, + frame_rate=video_stream.frame_rate, + frame_size=video_stream.frame_size, + video_name=video_name, + ) + elif format == FcpFormat.FCP7: + write_scene_list_fcp7( + output_path=xml_path, + scene_list=scenes, + video_path=video_path, + frame_rate=video_stream.frame_rate, + frame_size=video_stream.frame_size, + video_name=video_name, + source_duration=video_stream.duration, + ) else: logger.error(f"Unknown format: {format}") -# TODO: We have to export framerate as a float for OTIO's current format. When OTIO supports -# fractional timecodes, we should export the framerate as a rational number instead. -# https://github.com/AcademySoftwareFoundation/OpenTimelineIO/issues/190 def save_otio( context: CliContext, scenes: SceneList, @@ -544,92 +334,19 @@ def save_otio( name: str, audio: bool, ): - """Saves scenes in OTIO format.""" - + """Handles the `save-otio` command.""" del cuts # We only use scene information - - video_name = context.video_stream.name - video_path = os.path.abspath(context.video_stream.path) - video_base_name = os.path.basename(context.video_stream.path) - frame_rate = float(context.video_stream.frame_rate) - - # List of track mapping to resource type. - # TODO(https://scenedetect.com/issues/497): Allow OTIO export without an audio track. - track_list = {"Video 1": "Video"} - if audio: - track_list["Audio 1"] = "Audio" - - otio = { - "OTIO_SCHEMA": "Timeline.1", - "name": Template(name).safe_substitute(VIDEO_NAME=video_name), - "global_start_time": { - "OTIO_SCHEMA": "RationalTime.1", - "rate": frame_rate, - "value": 0.0, - }, - "tracks": { - "OTIO_SCHEMA": "Stack.1", - "enabled": True, - "children": [ - { - "OTIO_SCHEMA": "Track.1", - "name": track_name, - "enabled": True, - "children": [ - { - "OTIO_SCHEMA": "Clip.2", - "name": video_base_name, - "source_range": { - "OTIO_SCHEMA": "TimeRange.1", - "duration": { - "OTIO_SCHEMA": "RationalTime.1", - "rate": frame_rate, - "value": round((end - start).seconds * frame_rate, 6), - }, - "start_time": { - "OTIO_SCHEMA": "RationalTime.1", - "rate": frame_rate, - "value": round(start.seconds * frame_rate, 6), - }, - }, - "enabled": True, - "media_references": { - "DEFAULT_MEDIA": { - "OTIO_SCHEMA": "ExternalReference.1", - "name": video_base_name, - "available_range": { - "OTIO_SCHEMA": "TimeRange.1", - "duration": { - "OTIO_SCHEMA": "RationalTime.1", - "rate": frame_rate, - "value": 1980.0, - }, - "start_time": { - "OTIO_SCHEMA": "RationalTime.1", - "rate": frame_rate, - "value": 0.0, - }, - }, - "available_image_bounds": None, - "target_url": video_path, - } - }, - "active_media_reference_key": "DEFAULT_MEDIA", - } - for (start, end) in scenes - ], - "kind": track_type, - } - for (track_name, track_type) in track_list.items() - ], - }, - } - + video_stream = context.video_stream + video_name = str(video_stream.name) otio_path = get_and_create_path( - Template(filename).safe_substitute(VIDEO_NAME=context.video_stream.name), + Template(filename).safe_substitute(VIDEO_NAME=video_name), output, ) - logger.info(f"Writing scenes in OTIO format to {otio_path}") - with open(otio_path, "w") as f: - json.dump(otio, f, indent=4) - f.write("\n") + write_scene_list_otio( + output_path=otio_path, + scene_list=scenes, + video_path=str(video_stream.path), + frame_rate=video_stream.frame_rate, + name=Template(name).safe_substitute(VIDEO_NAME=video_name), + audio=audio, + ) diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py index 80fc082f..1bf8dad0 100644 --- a/scenedetect/_cli/config.py +++ b/scenedetect/_cli/config.py @@ -305,12 +305,12 @@ def format(self, timecode: FrameTimecode) -> str: raise RuntimeError("Unhandled format specifier.") -class XmlFormat(Enum): - """Format to use with the `save-xml` command.""" +class FcpFormat(Enum): + """Format to use with the `save-fcp` command.""" FCPX = 0 """Final Cut Pro X XML Format""" - FCP = 1 + FCP7 = 1 """Final Cut Pro 7 XML Format""" @@ -433,8 +433,8 @@ class XmlFormat(Enum): "filename": "$VIDEO_NAME.qp", "output": None, }, - "save-xml": { - "format": XmlFormat.FCPX, + "save-fcp": { + "format": FcpFormat.FCPX, "filename": "$VIDEO_NAME.xml", "output": None, }, @@ -480,8 +480,8 @@ class XmlFormat(Enum): "format": ["jpeg", "png", "webp"], "scale-method": [value.name.lower() for value in Interpolation], }, - "save-xml": { - "format": [value.name.lower() for value in XmlFormat], + "save-fcp": { + "format": [value.name.lower() for value in FcpFormat], }, "split-video": { "preset": [ diff --git a/scenedetect/output/__init__.py b/scenedetect/output/__init__.py index 3acd48f8..b913933f 100644 --- a/scenedetect/output/__init__.py +++ b/scenedetect/output/__init__.py @@ -16,8 +16,13 @@ """ import csv +import json import logging import typing as ty +from fractions import Fraction +from pathlib import Path +from xml.dom import minidom +from xml.etree import ElementTree from scenedetect._thirdparty.simpletable import ( HTMLPage, @@ -28,6 +33,7 @@ ) from scenedetect.common import ( CutList, + FrameTimecode, SceneList, ) @@ -233,3 +239,386 @@ def write_scene_list_html( page.add_table(scene_table) page.css = css page.save(output_html_filename) + + +def _edl_timecode(timecode: FrameTimecode) -> str: + """Format `timecode` as ``HH:MM:SS:FF`` for a CMX 3600 EDL entry.""" + total_seconds = timecode.seconds + hours = int(total_seconds // 3600) + minutes = int((total_seconds % 3600) // 60) + seconds = int(total_seconds % 60) + frames_part = int((total_seconds * timecode.framerate) % timecode.framerate) + return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames_part:02d}" + + +def write_scene_list_edl( + output_path: ty.Union[str, Path], + scene_list: SceneList, + title: str = "PySceneDetect", + reel: str = "AX", +): + """Writes the given list of scenes to `output_path` in CMX 3600 EDL format. + + Arguments: + output_path: Path to write the EDL file to. Parent directories must exist. + scene_list: List of scenes as pairs of FrameTimecodes denoting each scene's start/end. + title: Title header written as ``TITLE:`` in the EDL. + reel: Reel name used for each event. Typically 2-8 uppercase characters. + """ + output_path = Path(output_path) + lines = [f"TITLE: {title}", "FCM: NON-DROP FRAME", ""] + for i, (start, end) in enumerate(scene_list): + in_tc = _edl_timecode(start) + out_tc = _edl_timecode(end) + lines.append(f"{(i + 1):03d} {reel} V C {in_tc} {out_tc} {in_tc} {out_tc}") + logger.info("Writing scenes in EDL format to %s", output_path) + with open(output_path, "w") as f: + # `scenedetect` is imported lazily to avoid a circular import at module load. + import scenedetect + + f.write(f"* CREATED WITH PYSCENEDETECT {scenedetect.__version__}\n") + f.write("\n".join(lines)) + f.write("\n") + + +def _rational_seconds(value: Fraction) -> str: + """Format a `Fraction` as an FCPXML rational time string. + + FCPXML expresses time as ``/s`` (or ``s`` for whole seconds). See + https://developer.apple.com/documentation/professional-video-applications/fcpxml-reference + """ + if value.denominator == 1: + return f"{value.numerator}s" + return f"{value.numerator}/{value.denominator}s" + + +def _frame_timecode_seconds(tc: FrameTimecode) -> Fraction: + """Exact seconds for `tc` as a `Fraction`, derived from PTS × time base.""" + return Fraction(tc.pts) * tc.time_base + + +def write_scene_list_fcpx( + output_path: ty.Union[str, Path], + scene_list: SceneList, + video_path: ty.Union[str, Path], + frame_rate: Fraction, + frame_size: ty.Tuple[int, int], + video_name: ty.Optional[str] = None, +): + """Writes the given list of scenes to `output_path` in Final Cut Pro X XML format (FCPXML 1.9). + + The output follows Apple's FCPXML schema with rational-second time values and a custom + ```` derived from the source video's frame rate and resolution. See + https://developer.apple.com/documentation/professional-video-applications/fcpxml-reference + + Arguments: + output_path: Path to write the FCPXML file to. Parent directories must exist. + scene_list: List of scenes as pairs of FrameTimecodes. Must not be empty. + video_path: Path to the source video file; written into the output as a ``file://`` URI. + frame_rate: Source frame rate as a rational `Fraction` (e.g. ``Fraction(24000, 1001)``). + frame_size: Source resolution as a ``(width, height)`` tuple in pixels. + video_name: Display name used for the asset, project, and event. Defaults to the stem + of `video_path`. + """ + assert scene_list + output_path = Path(output_path) + video_path = Path(video_path) + if video_name is None: + video_name = video_path.stem + + ASSET_ID = "r2" + FORMAT_ID = "r1" + + width, height = frame_size + frame_duration = _rational_seconds(Fraction(frame_rate.denominator, frame_rate.numerator)) + src_uri = video_path.absolute().as_uri() + total_duration = _rational_seconds( + _frame_timecode_seconds(scene_list[-1][1] - scene_list[0][0]) + ) + + root = ElementTree.Element("fcpxml", version="1.9") + resources = ElementTree.SubElement(root, "resources") + # `name` is cosmetic: Apple publishes no authoritative FFVideoFormat* list, and editors key + # off frameDuration/width/height. We emit a generated name for display only. + format_name = f"FFVideoFormat{height}p{round(float(frame_rate) * 100):04d}" + ElementTree.SubElement( + resources, + "format", + id=FORMAT_ID, + name=format_name, + frameDuration=frame_duration, + width=str(width), + height=str(height), + ) + asset = ElementTree.SubElement( + resources, + "asset", + id=ASSET_ID, + name=video_name, + start="0s", + duration=total_duration, + hasVideo="1", + format=FORMAT_ID, + ) + ElementTree.SubElement(asset, "media-rep", kind="original-media", src=src_uri) + + library = ElementTree.SubElement(root, "library") + event = ElementTree.SubElement(library, "event", name=video_name) + project = ElementTree.SubElement(event, "project", name=video_name) + sequence = ElementTree.SubElement( + project, + "sequence", + format=FORMAT_ID, + duration=total_duration, + tcStart="0s", + tcFormat="NDF", + ) + spine = ElementTree.SubElement(sequence, "spine") + + for i, (start, end) in enumerate(scene_list): + scene_start = _rational_seconds(_frame_timecode_seconds(start)) + scene_duration = _rational_seconds(_frame_timecode_seconds(end - start)) + ElementTree.SubElement( + spine, + "asset-clip", + name=f"Shot {i + 1}", + ref=ASSET_ID, + offset=scene_start, + start=scene_start, + duration=scene_duration, + ) + + pretty_xml = minidom.parseString(ElementTree.tostring(root, encoding="unicode")).toprettyxml( + indent=" " + ) + logger.info("Writing scenes in FCPX format to %s", output_path) + with open(output_path, "w") as f: + f.write(pretty_xml) + + +def write_scene_list_fcp7( + output_path: ty.Union[str, Path], + scene_list: SceneList, + video_path: ty.Union[str, Path], + frame_rate: Fraction, + frame_size: ty.Tuple[int, int], + video_name: ty.Optional[str] = None, + source_duration: ty.Optional[FrameTimecode] = None, +): + """Writes the given list of scenes to `output_path` in Final Cut Pro 7 XML (xmeml) format. + + See the xmeml element reference at + https://developer.apple.com/library/archive/documentation/AppleApplications/Reference/FinalCutPro_XML/. + ``pathurl`` is written as a valid ``file://`` URI per the xmeml spec. + + Arguments: + output_path: Path to write the xmeml file to. Parent directories must exist. + scene_list: List of scenes as pairs of FrameTimecodes. Must not be empty. + video_path: Path to the source video file; written into the output as a ``file://`` URI. + frame_rate: Source frame rate as a rational `Fraction`. + frame_size: Source resolution as a ``(width, height)`` tuple in pixels. + video_name: Display name used for project and sequence. Defaults to the stem of + `video_path`. + source_duration: Total duration of the source media. Required on ```` so NLEs + (DaVinci Resolve, Premiere) can seek into the source — without it the clip plays + frozen. If None, falls back to the last scene's end time. + """ + assert scene_list + output_path = Path(output_path) + video_path = Path(video_path) + if video_name is None: + video_name = video_path.stem + + root = ElementTree.Element("xmeml", version="5") + project = ElementTree.SubElement(root, "project") + ElementTree.SubElement(project, "name").text = video_name + sequence = ElementTree.SubElement(project, "sequence") + ElementTree.SubElement(sequence, "name").text = video_name + + fps = float(frame_rate) + ntsc = "True" if frame_rate.denominator != 1 else "False" + duration = scene_list[-1][1] - scene_list[0][0] + ElementTree.SubElement(sequence, "duration").text = str(round(duration.seconds * fps)) + + rate = ElementTree.SubElement(sequence, "rate") + ElementTree.SubElement(rate, "timebase").text = str(round(fps)) + ElementTree.SubElement(rate, "ntsc").text = ntsc + + timecode = ElementTree.SubElement(sequence, "timecode") + tc_rate = ElementTree.SubElement(timecode, "rate") + ElementTree.SubElement(tc_rate, "timebase").text = str(round(fps)) + ElementTree.SubElement(tc_rate, "ntsc").text = ntsc + ElementTree.SubElement(timecode, "frame").text = "0" + ElementTree.SubElement(timecode, "displayformat").text = "NDF" + + width, height = frame_size + media = ElementTree.SubElement(sequence, "media") + video = ElementTree.SubElement(media, "video") + format = ElementTree.SubElement(video, "format") + sample_chars = ElementTree.SubElement(format, "samplecharacteristics") + ElementTree.SubElement(sample_chars, "width").text = str(width) + ElementTree.SubElement(sample_chars, "height").text = str(height) + track = ElementTree.SubElement(video, "track") + + path_uri = video_path.absolute().as_uri() + source_duration_frames = str( + round( + (source_duration.seconds if source_duration is not None else scene_list[-1][1].seconds) + * fps + ) + ) + FILE_ID = "file1" + + for i, (start, end) in enumerate(scene_list): + clip = ElementTree.SubElement(track, "clipitem") + ElementTree.SubElement(clip, "name").text = f"Shot {i + 1}" + ElementTree.SubElement(clip, "enabled").text = "TRUE" + ElementTree.SubElement(clip, "duration").text = source_duration_frames + clip_rate = ElementTree.SubElement(clip, "rate") + ElementTree.SubElement(clip_rate, "timebase").text = str(round(fps)) + ElementTree.SubElement(clip_rate, "ntsc").text = ntsc + # Frame numbers relative to the declared fps, computed from PTS seconds. + ElementTree.SubElement(clip, "start").text = str(round(start.seconds * fps)) + ElementTree.SubElement(clip, "end").text = str(round(end.seconds * fps)) + ElementTree.SubElement(clip, "in").text = str(round(start.seconds * fps)) + ElementTree.SubElement(clip, "out").text = str(round(end.seconds * fps)) + + # xmeml allows a single full `` declaration reused via `` on + # subsequent clipitems. Emit full details on the first, then self-close on the rest. + if i == 0: + file_ref = ElementTree.SubElement(clip, "file", id=FILE_ID) + ElementTree.SubElement(file_ref, "name").text = video_name + ElementTree.SubElement(file_ref, "pathurl").text = path_uri + ElementTree.SubElement(file_ref, "duration").text = source_duration_frames + file_rate = ElementTree.SubElement(file_ref, "rate") + ElementTree.SubElement(file_rate, "timebase").text = str(round(fps)) + ElementTree.SubElement(file_rate, "ntsc").text = ntsc + media_ref = ElementTree.SubElement(file_ref, "media") + video_ref = ElementTree.SubElement(media_ref, "video") + clip_chars = ElementTree.SubElement(video_ref, "samplecharacteristics") + ElementTree.SubElement(clip_chars, "width").text = str(width) + ElementTree.SubElement(clip_chars, "height").text = str(height) + else: + ElementTree.SubElement(clip, "file", id=FILE_ID) + + link = ElementTree.SubElement(clip, "link") + ElementTree.SubElement(link, "linkclipref").text = FILE_ID + ElementTree.SubElement(link, "mediatype").text = "video" + + pretty_xml = minidom.parseString(ElementTree.tostring(root, encoding="unicode")).toprettyxml( + indent=" " + ) + logger.info("Writing scenes in FCP format to %s", output_path) + with open(output_path, "w") as f: + f.write(pretty_xml) + + +# TODO: We have to export framerate as a float for OTIO's current format. When OTIO supports +# fractional timecodes, we should export the framerate as a rational number instead. +# https://github.com/AcademySoftwareFoundation/OpenTimelineIO/issues/190 +def write_scene_list_otio( + output_path: ty.Union[str, Path], + scene_list: SceneList, + video_path: ty.Union[str, Path], + frame_rate: Fraction, + name: ty.Optional[str] = None, + audio: bool = True, +): + """Writes the given list of scenes to `output_path` as an OTIO Timeline.1 JSON document. + + OTIO (OpenTimelineIO) timelines can be imported by many video editors. + + Arguments: + output_path: Path to write the OTIO file to. Parent directories must exist. + scene_list: List of scenes as pairs of FrameTimecodes. + video_path: Path to the source video file; written into the output as an absolute path. + frame_rate: Source frame rate as a rational `Fraction`. Exported as a float, as the + current OTIO format does not support rational timings. + name: Timeline name. Defaults to the stem of `video_path`. + audio: If True (default), include an audio track alongside the video track. + """ + output_path = Path(output_path) + video_path = Path(video_path) + if name is None: + name = video_path.stem + + video_base_name = video_path.name + video_abs_path = str(video_path.absolute()) + fps = float(frame_rate) + + # List of track mapping to resource type. + # TODO(https://scenedetect.com/issues/497): Allow OTIO export without an audio track. + track_list = {"Video 1": "Video"} + if audio: + track_list["Audio 1"] = "Audio" + + otio = { + "OTIO_SCHEMA": "Timeline.1", + "name": name, + "global_start_time": { + "OTIO_SCHEMA": "RationalTime.1", + "rate": fps, + "value": 0.0, + }, + "tracks": { + "OTIO_SCHEMA": "Stack.1", + "enabled": True, + "children": [ + { + "OTIO_SCHEMA": "Track.1", + "name": track_name, + "enabled": True, + "children": [ + { + "OTIO_SCHEMA": "Clip.2", + "name": video_base_name, + "source_range": { + "OTIO_SCHEMA": "TimeRange.1", + "duration": { + "OTIO_SCHEMA": "RationalTime.1", + "rate": fps, + "value": round((end - start).seconds * fps, 6), + }, + "start_time": { + "OTIO_SCHEMA": "RationalTime.1", + "rate": fps, + "value": round(start.seconds * fps, 6), + }, + }, + "enabled": True, + "media_references": { + "DEFAULT_MEDIA": { + "OTIO_SCHEMA": "ExternalReference.1", + "name": video_base_name, + "available_range": { + "OTIO_SCHEMA": "TimeRange.1", + "duration": { + "OTIO_SCHEMA": "RationalTime.1", + "rate": fps, + "value": 1980.0, + }, + "start_time": { + "OTIO_SCHEMA": "RationalTime.1", + "rate": fps, + "value": 0.0, + }, + }, + "available_image_bounds": None, + "target_url": video_abs_path, + } + }, + "active_media_reference_key": "DEFAULT_MEDIA", + } + for (start, end) in scene_list + ], + "kind": track_type, + } + for (track_name, track_type) in track_list.items() + ], + }, + } + + logger.info("Writing scenes in OTIO format to %s", output_path) + with open(output_path, "w") as f: + json.dump(otio, f, indent=4) + f.write("\n") diff --git a/tests/test_cli.py b/tests/test_cli.py index 62a70114..78841978 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1162,8 +1162,8 @@ def test_cli_save_otio_no_audio(tmp_path: Path): ) -def test_cli_save_xml_fcpx(tmp_path: Path): - """Test `save-xml --format fcpx` produces a valid FCPXML 1.9 file.""" +def test_cli_save_fcp_fcpx(tmp_path: Path): + """Test `save-fcp --format fcpx` produces a valid FCPXML 1.9 file.""" from xml.etree import ElementTree exit_code, _ = invoke_cli( @@ -1178,7 +1178,7 @@ def test_cli_save_xml_fcpx(tmp_path: Path): "-d", "4s", "detect-content", - "save-xml", + "save-fcp", ] ) assert exit_code == 0 @@ -1211,8 +1211,8 @@ def test_cli_save_xml_fcpx(tmp_path: Path): assert clip.attrib[attr].endswith("s") -def test_cli_save_xml_fcp(tmp_path: Path): - """Test `save-xml --format fcp` produces a valid FCP7 xmeml file.""" +def test_cli_save_fcp_fcp7(tmp_path: Path): + """Test `save-fcp --format fcp7` produces a valid FCP7 xmeml file.""" from xml.etree import ElementTree exit_code, _ = invoke_cli( @@ -1227,9 +1227,9 @@ def test_cli_save_xml_fcp(tmp_path: Path): "-d", "4s", "detect-content", - "save-xml", + "save-fcp", "--format", - "fcp", + "fcp7", ] ) assert exit_code == 0 diff --git a/tests/test_output.py b/tests/test_output.py index 3936f5e8..d3efe182 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -11,7 +11,10 @@ # """Tests for scenedetect.output module.""" +import json +from fractions import Fraction from pathlib import Path +from xml.etree import ElementTree import pytest @@ -28,6 +31,10 @@ VideoMetadata, is_ffmpeg_available, split_video_ffmpeg, + write_scene_list_edl, + write_scene_list_fcp7, + write_scene_list_fcpx, + write_scene_list_otio, ) FFMPEG_ARGS = ( @@ -223,3 +230,210 @@ def test_save_images_zero_width_scene(test_video_file, tmp_path: Path): total_images += 1 assert total_images == len([path for path in tmp_path.glob(image_name_glob)]) + + +# +# Scene-list export API (EDL / FCPXML / FCP7 xmeml / OTIO) +# +# These tests construct small synthetic scene lists so they do not require video +# decoding and stay fast. They assert the structural invariants each format must +# hold (e.g. rational time strings for FCPXML, `file://` URIs for xmeml, OTIO +# Clip.2 count matching scene count). + +_FPS_NTSC = Fraction(24000, 1001) +_FPS_CFR = Fraction(30, 1) + + +def _fake_scenes(fps: Fraction, frames): + return [(FrameTimecode(start, fps=fps), FrameTimecode(end, fps=fps)) for start, end in frames] + + +def test_write_scene_list_edl(tmp_path: Path): + """EDL output has title header, FCM line, and one event per scene in CMX 3600 format.""" + scenes = _fake_scenes(_FPS_CFR, [(0, 30), (30, 60)]) + output_path = tmp_path / "scenes.edl" + write_scene_list_edl(output_path, scenes, title="my-clip", reel="AX") + + content = output_path.read_text() + assert "TITLE: my-clip" in content + assert "FCM: NON-DROP FRAME" in content + assert "001 AX V C 00:00:00:00 00:00:01:00 00:00:00:00 00:00:01:00" in content + assert "002 AX V C 00:00:01:00 00:00:02:00 00:00:01:00 00:00:02:00" in content + + +def test_write_scene_list_edl_accepts_str_path(tmp_path: Path): + """`output_path` must accept both Path and str.""" + scenes = _fake_scenes(_FPS_CFR, [(0, 30)]) + output_path = tmp_path / "scenes.edl" + write_scene_list_edl(str(output_path), scenes) + assert output_path.exists() + + +def test_write_scene_list_fcpx(tmp_path: Path): + """FCPXML output declares version 1.9, rational time strings, and an asset-clip per scene.""" + scenes = _fake_scenes(_FPS_NTSC, [(48, 96), (96, 144)]) + output_path = tmp_path / "scenes.xml" + # `video_path` need not exist; only `.absolute().as_uri()` is called on it. + write_scene_list_fcpx( + output_path=output_path, + scene_list=scenes, + video_path=tmp_path / "fake_video.mp4", + frame_rate=_FPS_NTSC, + frame_size=(1280, 544), + ) + + root = ElementTree.parse(output_path).getroot() + assert root.tag == "fcpxml" + assert root.attrib["version"] == "1.9" + + fmt = root.find("resources/format") + assert fmt is not None + # 24000/1001 fps → frameDuration is the reciprocal: 1001/24000s. + assert fmt.attrib["frameDuration"] == "1001/24000s" + assert fmt.attrib["width"] == "1280" + assert fmt.attrib["height"] == "544" + + media_rep = root.find("resources/asset/media-rep") + assert media_rep is not None + assert media_rep.attrib["src"].startswith("file://") + + clips = root.findall("library/event/project/sequence/spine/asset-clip") + assert len(clips) == 2 + for clip in clips: + for attr in ("offset", "start", "duration"): + assert clip.attrib[attr].endswith("s") + + +def test_write_scene_list_fcpx_video_name_defaults_to_path_stem(tmp_path: Path): + """Omitting `video_name` falls back to the stem of `video_path`.""" + scenes = _fake_scenes(_FPS_NTSC, [(0, 24)]) + output_path = tmp_path / "scenes.xml" + write_scene_list_fcpx( + output_path=output_path, + scene_list=scenes, + video_path=tmp_path / "my_clip.mp4", + frame_rate=_FPS_NTSC, + frame_size=(640, 360), + ) + root = ElementTree.parse(output_path).getroot() + asset = root.find("resources/asset") + assert asset is not None and asset.attrib["name"] == "my_clip" + + +def test_write_scene_list_fcp7(tmp_path: Path): + """FCP7 xmeml declares version 5, a clipitem per scene, and a shared reference.""" + scenes = _fake_scenes(_FPS_NTSC, [(0, 48), (48, 96)]) + output_path = tmp_path / "scenes.xml" + write_scene_list_fcp7( + output_path=output_path, + scene_list=scenes, + video_path=tmp_path / "source.mp4", + frame_rate=_FPS_NTSC, + frame_size=(1920, 1080), + source_duration=FrameTimecode(240, fps=_FPS_NTSC), + ) + + root = ElementTree.parse(output_path).getroot() + assert root.tag == "xmeml" + assert root.attrib["version"] == "5" + + ntsc = root.find("project/sequence/rate/ntsc") + assert ntsc is not None and ntsc.text == "True" + + clipitems = root.findall("project/sequence/media/video/track/clipitem") + assert len(clipitems) == 2 + # First clipitem carries the full declaration; later ones reference it by id. + first_file = clipitems[0].find("file") + assert first_file is not None and first_file.attrib["id"] == "file1" + pathurl = first_file.find("pathurl") + assert pathurl is not None and pathurl.text is not None + assert pathurl.text.startswith("file://") + assert first_file.find("duration") is not None + second_file = clipitems[1].find("file") + assert second_file is not None and second_file.attrib["id"] == "file1" + assert second_file.find("pathurl") is None + + +def test_write_scene_list_fcp7_cfr_sets_ntsc_false(tmp_path: Path): + """Integer frame rates (denominator == 1) must set ntsc="False".""" + scenes = _fake_scenes(_FPS_CFR, [(0, 30)]) + output_path = tmp_path / "scenes.xml" + write_scene_list_fcp7( + output_path=output_path, + scene_list=scenes, + video_path=tmp_path / "source.mp4", + frame_rate=_FPS_CFR, + frame_size=(640, 360), + ) + root = ElementTree.parse(output_path).getroot() + ntsc = root.find("project/sequence/rate/ntsc") + assert ntsc is not None and ntsc.text == "False" + + +def test_write_scene_list_otio(tmp_path: Path): + """OTIO output is valid JSON with a Timeline.1 schema and one Clip.2 per scene per track.""" + scenes = _fake_scenes(_FPS_NTSC, [(24, 72), (72, 120)]) + output_path = tmp_path / "scenes.otio" + write_scene_list_otio( + output_path=output_path, + scene_list=scenes, + video_path=tmp_path / "clip.mp4", + frame_rate=_FPS_NTSC, + name="my-timeline", + ) + + doc = json.loads(output_path.read_text()) + assert doc["OTIO_SCHEMA"] == "Timeline.1" + assert doc["name"] == "my-timeline" + assert doc["global_start_time"]["rate"] == pytest.approx(float(_FPS_NTSC)) + + tracks = doc["tracks"]["children"] + # Default `audio=True` yields both a video and an audio track. + assert [t["kind"] for t in tracks] == ["Video", "Audio"] + for track in tracks: + assert len(track["children"]) == len(scenes) + for clip in track["children"]: + assert clip["OTIO_SCHEMA"] == "Clip.2" + ref = clip["media_references"]["DEFAULT_MEDIA"] + assert ref["OTIO_SCHEMA"] == "ExternalReference.1" + assert Path(ref["target_url"]).is_absolute() + + +def test_write_scene_list_otio_no_audio(tmp_path: Path): + """`audio=False` omits the audio track.""" + scenes = _fake_scenes(_FPS_NTSC, [(0, 24)]) + output_path = tmp_path / "scenes.otio" + write_scene_list_otio( + output_path=output_path, + scene_list=scenes, + video_path=tmp_path / "clip.mp4", + frame_rate=_FPS_NTSC, + audio=False, + ) + doc = json.loads(output_path.read_text()) + tracks = doc["tracks"]["children"] + assert [t["kind"] for t in tracks] == ["Video"] + + +def test_write_scene_list_otio_rational_time_precision(tmp_path: Path): + """Serialized frame-count values must be free of sub-10µs float drift (cf. 914ca31).""" + # Frames on integer-frame boundaries under NTSC 24000/1001: seconds * 23.976... + # should land on integers but floats can produce values like 214.00001 without + # the explicit round(..., 6) in the writer. + scenes = _fake_scenes( + _FPS_NTSC, + [(start, start + 24) for start in (0, 24, 48, 96, 120)], + ) + output_path = tmp_path / "scenes.otio" + write_scene_list_otio( + output_path=output_path, + scene_list=scenes, + video_path=tmp_path / "clip.mp4", + frame_rate=_FPS_NTSC, + ) + doc = json.loads(output_path.read_text()) + for track in doc["tracks"]["children"]: + for clip in track["children"]: + for key in ("start_time", "duration"): + value = clip["source_range"][key]["value"] + assert value == round(value, 6), f"value {value!r} carries sub-10µs float drift" diff --git a/tests/test_vfr.py b/tests/test_vfr.py index c98d80e5..5b0ad57f 100644 --- a/tests/test_vfr.py +++ b/tests/test_vfr.py @@ -381,9 +381,9 @@ def test_vfr_edl_export(test_vfr_video: str, tmp_path): assert "001 AX V" in content -@pytest.mark.parametrize("xml_format", ["fcpx", "fcp"]) -def test_vfr_xml_export(test_vfr_video: str, xml_format: str, tmp_path): - """`save-xml` should succeed on VFR video and produce well-formed output in either dialect.""" +@pytest.mark.parametrize("fcp_format", ["fcpx", "fcp7"]) +def test_vfr_fcp_export(test_vfr_video: str, fcp_format: str, tmp_path): + """`save-fcp` should succeed on VFR video and produce well-formed output in either dialect.""" from xml.etree import ElementTree exit_code, _ = invoke_cli( @@ -396,15 +396,15 @@ def test_vfr_xml_export(test_vfr_video: str, xml_format: str, tmp_path): "time", "--end", "10s", - "save-xml", + "save-fcp", "--format", - xml_format, + fcp_format, ] ) assert exit_code == 0 xml_path = next(tmp_path.glob("*.xml")) root = ElementTree.parse(xml_path).getroot() - assert root.tag == ("fcpxml" if xml_format == "fcpx" else "xmeml") + assert root.tag == ("fcpxml" if fcp_format == "fcpx" else "xmeml") def test_vfr_csv_backend_conformance(test_vfr_video: str): diff --git a/website/pages/changelog.md b/website/pages/changelog.md index 96af64d8..6f6acf7f 100644 --- a/website/pages/changelog.md +++ b/website/pages/changelog.md @@ -676,7 +676,7 @@ Although there have been minimal changes to most API examples, there are several ### CLI Changes - [feature] VFR videos are handled correctly by the OpenCV and PyAV backends, and should work correctly with default parameters -- [feature] New `save-xml` command allows exporting in Final Cut Pro format (FCP7/FCPX) [#156](https://github.com/Breakthrough/PySceneDetect/issues/156) +- [feature] New `save-fcp` command allows exporting in Final Cut Pro format (FCP7/FCPX) [#156](https://github.com/Breakthrough/PySceneDetect/issues/156) - [feature] `--min-scene-len`/`-m` and `save-images --frame-margin`/`-m` now accept seconds (e.g. `0.6s`) and timecodes (e.g. `00:00:00.600`) in addition to a frame count [#531](https://github.com/Breakthrough/PySceneDetect/issues/531) - [bugfix] Fix floating-point precision error in `save-otio` output where frame values near integer boundaries (e.g. `90.00000000000001`) were serialized with spurious precision - [bugfix] Add mitigation for transient `OSError` in the MoviePy backend as it is susceptible to subprocess pipe races on slow or heavily loaded systems [#496](https://github.com/Breakthrough/PySceneDetect/issues/496) @@ -686,6 +686,7 @@ Although there have been minimal changes to most API examples, there are several **VFR & Timestamp Overhaul:** + * Add `write_scene_list_edl`, `write_scene_list_fcpx`, `write_scene_list_fcp7`, and `write_scene_list_otio` to the `scenedetect.output` module so `save-edl`, `save-fcp`, and `save-otio` can be invoked directly from Python (previously CLI-only) * Add new `Timecode` type to represent frame timings in terms of the video's source timebase * Add `time_base` and `pts` properties to `FrameTimecode` for more accurate timing information * All backends (PyAV, OpenCV, MoviePy) now return PTS-backed timestamps from `VideoStream.position` From 5832f9f3364a87e4a50c9e294f0eb19d6be687fc Mon Sep 17 00:00:00 2001 From: Breakthrough Date: Wed, 22 Apr 2026 22:40:09 -0400 Subject: [PATCH 3/3] [docs] Regenerate CLI docs --- docs/cli.rst | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index 6df3d9f7..cca81f67 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -178,8 +178,6 @@ Options Default: ``15.0`` - - .. option:: -f VAL, --frame-window VAL Size of window to detect deviations from mean. Represents how many frames before/after the current one to use for mean. @@ -552,6 +550,38 @@ Options Output directory to save EDL file to. Overrides global option :option:`-o/--output `. +.. _command-save-fcp: + +.. program:: scenedetect save-fcp + + +``save-fcp`` +======================================================================== + +Save cuts in Final Cut Pro XML format (FCP7 xmeml or FCPX). + + +Options +------------------------------------------------------------------------ + + +.. option:: -f NAME, --filename NAME + + Filename format to use. + + Default: ``$VIDEO_NAME.xml`` + +.. option:: --format TYPE + + Format to export. TYPE must be one of: fcpx, fcp7. + + Default: ``FcpFormat.FCPX`` + +.. option:: -o DIR, --output DIR + + Output directory to save XML file to. Overrides global option :option:`-o/--output `. + + .. _command-save-html: .. program:: scenedetect save-html @@ -658,11 +688,11 @@ Options Default: ``3`` -.. option:: -m N, --frame-margin N +.. option:: -m DURATION, --frame-margin DURATION - Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries. + Padding around the beginning/end of each scene used when selecting which frames to extract. DURATION can be specified in frames (-m 1), in seconds with `s` suffix (-m 0.1s), or timecode (-m 00:00:00.100). - Default: ``3`` + Default: ``1`` .. option:: -s S, --scale S