In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os
from IPython.display import Markdown, display

load_dotenv(override=True)

True

In [3]:
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
openrouter_base_url = os.getenv("OPENROUTER_BASE_URL")
deepseek_model_name = os.getenv("DEEPSEEK_MODEL_NAME")

In [40]:
manim_agent_system_prompt = """
You are a professional python manim code writer your task is to get the user's prompt and
generate the manim code for the topic the user wants to learn, your manim script is perfect in
such a way that the animations, visualizations and simulations and teachings will be understood 
even for a layman who watches it.
Note: The manim script when converted to video must take 1 to 2 minutes of duration and ensure the video must be super easy for anyone to be engaged and understand the topic
additionally also add inline comments for the expected time to complete for each animations
and the manim script must not contain any errors at all!
the output format of your manim code must be exactly the same as below format!, no explanation at all only the code must be the output!

```python
[your manim script here...]
```
"""

manim_agent = OpenAI(api_key=openrouter_api_key, base_url=openrouter_base_url)

In [41]:
content = "derive the formula for the circumference of the circle"

response = manim_agent.chat.completions.create(
    messages=[
        {'role': 'system', 'content': manim_agent_system_prompt},
        {'role': 'user', 'content': content}
    ],
    model=deepseek_model_name
)

In [42]:
code = response.choices[0].message.content

print(code)

```python
from manim import *

class CircumferenceDerivation(Scene):
    def construct(self):
        # Set background color for better visibility
        self.camera.background_color = "#1e1e1e"
        
        # Title for the video
        title = Tex("Deriving the Circumference Formula", font_size=48)
        title.to_edge(UP)
        self.play(Write(title), run_time=1.5)
        self.wait(1)
        
        # Create circle with radius (4 seconds)
        circle = Circle(radius=2, color=BLUE_B)
        circle_center = Dot(color=RED).move_to(circle.get_center())
        radius_line = Line(circle.get_center(), circle.get_bottom(), color=YELLOW)
        radius_label = MathTex("r", color=YELLOW).next_to(radius_line, LEFT)
        
        self.play(Create(circle), run_time=2)
        self.play(FadeIn(circle_center), run_time=0.5)
        self.play(Create(radius_line), Write(radius_label), run_time=1.5)
        self.wait(1)
        
        # Show circumference diameter (5 seconds)
   

In [43]:
transcript_agent_system_prompt = """
You are a professional transcript writer for a educational manim script in python, 
your task is to understand the manim code block by block and then write dialogues that's 
best suitable like a lecture for that education video standpoint, the manim code also contains information 
regarding the time each code segments or animation takes so use that information to fill the dialogue in suitable size that fits within the estimated time

Now give the output exactly as the below JSON format only, with no explanation or other things only the trancsript as JSON

```json
[
    {'dialogue': '<the dialogue for the scene goes here...>', 'duration': <duration in seconds>},
    {'dialogue': '<the dialogue for the scene goes here...>', 'duration': <duration in seconds>},
    {'dialogue': '<the dialogue for the scene goes here...>', 'duration': <duration in seconds>},
    {'dialogue': '', 'duration': <duration in seconds>}, # also to note that the `''` means an pause block for `duration` seconds means this part is just a silenced for `duration` seconds (only if the manim code has a pause block or wait/sleep time related thing)
    {'dialogue': '<the dialogue for the scene goes here...>', 'duration': <duration in seconds>},
    ...
    
]
```
"""

transcript_agent = OpenAI(api_key=openrouter_api_key, base_url=openrouter_base_url)

In [45]:
response = transcript_agent.chat.completions.create(
    messages=[
        {'role': 'system', 'content': transcript_agent_system_prompt},
        {'role': 'user', 'content': code},
    ],
    model=deepseek_model_name
)

In [46]:
transcript = response.choices[0].message.content

print(transcript)

```json
[
    {"dialogue": "In this session, we will derive the formula for the circumference of a circle.", "duration": 1.5},
    {"dialogue": "", "duration": 1},
    {"dialogue": "We begin by drawing a circle.", "duration": 2},
    {"dialogue": "Mark its center point.", "duration": 0.5},
    {"dialogue": "The radius, labeled 'r,' connects the center to the perimeter.", "duration": 1.5},
    {"dialogue": "", "duration": 1},
    {"dialogue": "The diameter spans the circle passing through the center, so d equals 2r.", "duration": 2},
    {"dialogue": "", "duration": 3},
    {"dialogue": "Now, let's unroll the circle to visualize its circumference.", "duration": 1},
    {"dialogue": "Imagine a point moving around the circle, tracing its full perimeter.", "duration": 4},
    {"dialogue": "This trace straightens into a line representing the circumference.", "duration": 3},
    {"dialogue": "We label this length 'C' for circumference.", "duration": 1},
    {"dialogue": "", "duration": 1},
 

In [33]:
import pyttsx3
import tempfile
import os
from pydub import AudioSegment

def transcript_to_audio(transcript, output_file="final_audio.mp3", voice=None, rate=180):
    """
    Generate a merged audio file from transcript JSON using pyttsx3.
    transcript format:
    [
        {"dialogue": "some text", "duration": 3.5},
        {"dialogue": "", "duration": 1.0},  # silent pause
        ...
    ]
    """
    engine = pyttsx3.init()
    if voice:
        engine.setProperty("voice", voice)
    engine.setProperty("rate", rate)

    final_audio = AudioSegment.silent(duration=0)

    for i, seg in enumerate(transcript):
        target_duration = int(seg["duration"] * 1000)  # sec → ms

        if seg["dialogue"].strip():
            # Save TTS to a temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tf:
                tmp_path = tf.name
            engine.save_to_file(seg["dialogue"], tmp_path)
            engine.runAndWait()

            # Load into pydub
            speech = AudioSegment.from_file(tmp_path, format="wav")
            os.remove(tmp_path)

            # Adjust to match target duration
            speech_duration = len(speech)
            if speech_duration < target_duration:
                padding = AudioSegment.silent(duration=target_duration - speech_duration)
                speech += padding
            else:
                speech = speech[:target_duration]

            final_audio += speech
        else:
            # Silent pause only
            final_audio += AudioSegment.silent(duration=target_duration)

    # Export final merged audio
    final_audio.export(output_file, format="mp3")
    return output_file

