diff --git a/manim_voiceover/__init__.py b/manim_voiceover/__init__.py index d2bd2f1..7ef8f60 100644 --- a/manim_voiceover/__init__.py +++ b/manim_voiceover/__init__.py @@ -4,3 +4,4 @@ import pkg_resources __version__: str = pkg_resources.get_distribution(__name__).version + diff --git a/manim_voiceover/helper.py b/manim_voiceover/helper.py index 81721bb..e38c444 100644 --- a/manim_voiceover/helper.py +++ b/manim_voiceover/helper.py @@ -8,7 +8,21 @@ import textwrap from pydub import AudioSegment from pathlib import Path -from manim import logger + +import pkg_resources + +__manimtype__: str = "manimce" + +try: + pkg_resources.get_distribution("manim") + __manimtype__ = "manimce" +except: + __manimtype__ = "manimgl" + +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger def chunks(lst: list, n: int): diff --git a/manim_voiceover/services/azure.py b/manim_voiceover/services/azure.py index aa61bf8..cbd7b03 100644 --- a/manim_voiceover/services/azure.py +++ b/manim_voiceover/services/azure.py @@ -7,7 +7,12 @@ prompt_ask_missing_extras, remove_bookmarks, ) -from manim import logger +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger try: import azure.cognitiveservices.speech as speechsdk diff --git a/manim_voiceover/services/base.py b/manim_voiceover/services/base.py index bc78195..002f9b1 100644 --- a/manim_voiceover/services/base.py +++ b/manim_voiceover/services/base.py @@ -5,7 +5,13 @@ import sys import hashlib from pathlib import Path -from manim import config, logger +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import config, logger +else: + from manimlib import config, logger + from slugify import slugify from manim_voiceover.defaults import ( DEFAULT_VOICEOVER_CACHE_DIR, @@ -72,7 +78,10 @@ def __init__( if cache_dir is not None: self.cache_dir = cache_dir else: - self.cache_dir = Path(config.media_dir) / DEFAULT_VOICEOVER_CACHE_DIR + if __manimtype__ == "manimce": + self.cache_dir = Path(config.media_dir) / DEFAULT_VOICEOVER_CACHE_DIR + else: + self.cache_dir = Path(config.get_custom_config()["directories"]["output"]) / DEFAULT_VOICEOVER_CACHE_DIR if not os.path.exists(self.cache_dir): os.makedirs(self.cache_dir) diff --git a/manim_voiceover/services/coqui.py b/manim_voiceover/services/coqui.py index 2747663..c43c039 100644 --- a/manim_voiceover/services/coqui.py +++ b/manim_voiceover/services/coqui.py @@ -1,6 +1,12 @@ from pathlib import Path -from manim import logger +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger + from manim_voiceover.helper import prompt_ask_missing_package, remove_bookmarks, wav2mp3 from manim_voiceover.services.base import SpeechService diff --git a/manim_voiceover/services/edge.py b/manim_voiceover/services/edge.py new file mode 100644 index 0000000..6c08643 --- /dev/null +++ b/manim_voiceover/services/edge.py @@ -0,0 +1,106 @@ +from manim_voiceover.helper import __manimtype__ +from manim_voiceover.services.base import SpeechService +import edge_tts + +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger + +from manim_voiceover.helper import ( + remove_bookmarks, +) +from pathlib import Path +import asyncio + +def serialize_word_boundary(wb): + return { + "audio_offset": wb["offset"], + "duration_milliseconds": int(wb["duration"].microseconds / 1000), + "text_offset": wb["offset"], + "word_length": len(wb["text"]), + "text": wb["text"], + "boundary_type": wb["type"], + } + +async def get_voice_file(text, voice, out_file, wb=[]) -> None: + communicate = edge_tts.Communicate(text, voice) + with open(out_file, "wb") as file: + async for chunk in communicate.stream(): + if chunk["type"] == "audio": + file.write(chunk["data"]) + elif chunk["type"] == "WordBoundary": + wb.append(chunk) + print(f"WordBoundary: {chunk}") + +class EdgeService(SpeechService): + + def __init__( + self, + voice: str = "zh-CN-XiaoxiaoNeural", + style: str = None, + output_format: str = "Audio48Khz192KBitRateMonoMp3", + prosody: dict = None, + **kwargs, + ): + + self.voice = voice + self.style = style + self.output_format = output_format + self.prosody = prosody + SpeechService.__init__(self, **kwargs) + + def generate_from_text( + self, text: str, cache_dir: str = None, path: str = None, **kwargs + ) -> dict: + """""" + # Remove bookmarks + input_text = remove_bookmarks(text) + if cache_dir is None: + cache_dir = self.cache_dir + + ssml = "" + + input_data = { + "input_text": text, + "service": "edge", + "config": { + "voice": self.voice, + "style": self.style, + "output_format": self.output_format, + "prosody": self.prosody, + }, + } + + cached_result = self.get_cached_result(input_data, cache_dir) + if cached_result is not None: + return cached_result + + if path is None: + audio_path = self.get_audio_basename(input_data) + ".mp3" + else: + audio_path = path + word_boundaries = [] + loop = asyncio.get_event_loop() + loop.run_until_complete(get_voice_file(text,self.voice,str(Path(cache_dir) / audio_path),word_boundaries)) + _word_boundaries=[] + offset = 0 + for wb in word_boundaries: + _wb = {} + _wb["audio_offset"] = wb["offset"] + _wb["duration_milliseconds"] = wb["duration"] + _wb["text_offset"] = offset + _wb["word_length"] = len(wb["text"]) + _wb["text"] = wb["text"] + _wb["boundary_type"] = wb["type"] + offset+=_wb["word_length"] + _word_boundaries.append(_wb) + + json_dict = { + "input_text": text, + "input_data": input_data, + "original_audio": audio_path, + "word_boundaries": _word_boundaries + } + + return json_dict \ No newline at end of file diff --git a/manim_voiceover/services/gtts.py b/manim_voiceover/services/gtts.py index a38c532..81fe54b 100644 --- a/manim_voiceover/services/gtts.py +++ b/manim_voiceover/services/gtts.py @@ -1,5 +1,11 @@ from pathlib import Path -from manim import logger +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger + from manim_voiceover.helper import prompt_ask_missing_extras, remove_bookmarks try: diff --git a/manim_voiceover/services/pyttsx3.py b/manim_voiceover/services/pyttsx3.py index 0dfa803..c65711d 100644 --- a/manim_voiceover/services/pyttsx3.py +++ b/manim_voiceover/services/pyttsx3.py @@ -1,5 +1,11 @@ from pathlib import Path -from manim import logger +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger + from manim_voiceover.helper import prompt_ask_missing_extras try: diff --git a/manim_voiceover/services/recorder/__init__.py b/manim_voiceover/services/recorder/__init__.py index a7b7b4e..8d4b0e8 100644 --- a/manim_voiceover/services/recorder/__init__.py +++ b/manim_voiceover/services/recorder/__init__.py @@ -1,9 +1,13 @@ from pathlib import Path +from manim_voiceover.helper import __manimtype__ from manim_voiceover.helper import msg_box, prompt_ask_missing_extras, remove_bookmarks from manim_voiceover.services.base import SpeechService -from manim import logger +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger try: import pyaudio from manim_voiceover.services.recorder.utility import Recorder diff --git a/manim_voiceover/services/recorder/utility.py b/manim_voiceover/services/recorder/utility.py index 509ab0c..6b90098 100644 --- a/manim_voiceover/services/recorder/utility.py +++ b/manim_voiceover/services/recorder/utility.py @@ -5,7 +5,11 @@ import sched from pathlib import Path from pydub import AudioSegment -from manim import logger +from manim_voiceover.helper import __manimtype__ +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger from manim_voiceover.helper import trim_silence, wav2mp3 diff --git a/manim_voiceover/tracker.py b/manim_voiceover/tracker.py index 7cece45..3769aa3 100644 --- a/manim_voiceover/tracker.py +++ b/manim_voiceover/tracker.py @@ -1,12 +1,18 @@ from pathlib import Path import re import numpy as np -from manim import logger +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import logger + from manim import Scene +else: + from manimlib import logger + from manimlib import Scene from typing import Optional, List from scipy.interpolate import interp1d -from manim import Scene from manim_voiceover.modify_audio import get_duration from manim_voiceover.helper import remove_bookmarks @@ -48,7 +54,10 @@ def __init__(self, scene: Scene, data: dict, cache_dir: str): self.cache_dir = cache_dir self.duration = get_duration(Path(cache_dir) / self.data["final_audio"]) # last_t = scene.last_t - last_t = scene.renderer.time + if __manimtype__ == "manimce": + last_t = scene.renderer.time + else: + last_t = scene.time if last_t is None: last_t = 0 self.start_t = last_t @@ -97,7 +106,10 @@ def get_remaining_duration(self, buff: float = 0.0) -> float: int: The remaining duration of the voiceover in seconds. """ # result= max(self.end_t - self.scene.last_t, 0) - result = max(self.end_t - self.scene.renderer.time + buff, 0) + if __manimtype__ == "manimce": + result = max(self.end_t - self.scene.renderer.time + buff, 0) + else: + result = max(self.end_t - self.scene.time + buff, 0) # print(result) return result @@ -128,7 +140,10 @@ def time_until_bookmark( self._check_bookmarks() if not mark in self.bookmark_times: raise Exception("There is no " % mark) - result = max(self.bookmark_times[mark] - self.scene.renderer.time + buff, 0) + if __manimtype__ == "manimce": + result = max(self.bookmark_times[mark] - self.scene.renderer.time + buff, 0) + else: + result = max(self.bookmark_times[mark] - self.scene.time + buff, 0) if limit is not None: result = min(limit, result) return result diff --git a/manim_voiceover/translate/gettext_utils.py b/manim_voiceover/translate/gettext_utils.py index c39a0de..4408cf8 100644 --- a/manim_voiceover/translate/gettext_utils.py +++ b/manim_voiceover/translate/gettext_utils.py @@ -2,7 +2,12 @@ import os import typing as t -from manim import logger +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import logger +else: + from manimlib import logger from manim_voiceover.helper import prompt_ask_missing_extras diff --git a/manim_voiceover/voiceover_scene.py b/manim_voiceover/voiceover_scene.py index 16524f2..54a540c 100644 --- a/manim_voiceover/voiceover_scene.py +++ b/manim_voiceover/voiceover_scene.py @@ -5,7 +5,14 @@ import re import typing as t -from manim import Scene, config + +from manim_voiceover.helper import __manimtype__ + +if __manimtype__ == "manimce": + from manim import Scene, config +else: + from manimlib import Scene + from manim_voiceover.services.base import SpeechService from manim_voiceover.tracker import VoiceoverTracker from manim_voiceover.helper import chunks, remove_bookmarks @@ -82,6 +89,78 @@ def add_voiceover_text( ) return tracker + + def write_subcaption_file(self): + """Writes the subcaption file.""" + if __manimtype__ == "manimce": + self.write_subcaption_file() + else: + from manimlib import log + import srt + scene_name = self.file_writer.file_name or self.file_writer.get_default_scene_name() + if self.file_writer.output_directory == "": + output_file = "./videos/"+scene_name + else: + output_file = self.file_writer.output_directory+"/videos/"+scene_name + subcaption_file = Path(output_file).with_suffix(".srt") + subcaption_file.write_text(srt.compose(self.subcaptions), encoding="utf-8") + log.info(f"Subcaption file has been written as {subcaption_file}") + + def add_subcaption_compatible( + self, content: str, duration: float = 1, offset: float = 0 + ) -> None: + r"""Adds an entry in the corresponding subcaption file + at the current time stamp. + + The current time stamp is obtained from ``Scene.renderer.time``. + + Parameters + ---------- + + content + The subcaption content. + duration + The duration (in seconds) for which the subcaption is shown. + offset + This offset (in seconds) is added to the starting time stamp + of the subcaption. + + Examples + -------- + + This example illustrates both possibilities for adding + subcaptions to Manimations:: + + class SubcaptionExample(Scene): + def construct(self): + square = Square() + circle = Circle() + + # first option: via the add_subcaption_compatible method + self.add_subcaption_compatible("Hello square!", duration=1) + self.play(Create(square)) + + # second option: within the call to Scene.play + self.play( + Transform(square, circle), + subcaption="The square transforms." + ) + + """ + if __manimtype__ == "manimce": + self.add_subcaption(content, duration, offset) + else: + import srt + import datetime + + subtitle = srt.Subtitle( + index=len(self.subcaptions), + content=content, + start=datetime.timedelta(seconds=self.time + offset), + end=datetime.timedelta(seconds=self.time + offset + duration), + ) + self.subcaptions.append(subtitle) + self.write_subcaption_file() def add_wrapped_subcaption( self, @@ -118,7 +197,7 @@ def add_wrapped_subcaption( current_offset = 0 for idx, subcaption in enumerate(subcaptions): chunk_duration = duration * subcaption_weights[idx] - self.add_subcaption( + self.add_subcaption_compatible( subcaption, duration=max(chunk_duration - subcaption_buff, 0), offset=current_offset, @@ -150,7 +229,13 @@ def safe_wait(self, duration: float) -> None: Args: duration (float): The duration to wait for in seconds. """ - if duration > 1 / config["frame_rate"]: + + if __manimtype__ == "manimce": + frame_rate = config["frame_rate"] + else: + frame_rate = self.camera.fps + + if duration > 1 / frame_rate: self.wait(duration) def wait_until_bookmark(self, mark: str) -> None: