In [4]:
from director import script
from image_generation import image_generator
from audio_generation import generate_audio_clips
import json
import os
import requests
import subprocess  # For running ffmpeg command
from datetime import datetime
from moviepy.editor import TextClip, CompositeVideoClip, ImageClip, concatenate_videoclips

url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent?key=AIzaSyC8tc7m4TkAmfOx9cu_bckCc62ZgVDzSBQ"

# Prompting the user for input
context = input("Enter the context along with the Item name: ")


# this would be some pre research.
reqdata = {
    "item": f"{context}",
    "context": "Provide some context or specific detail you want to focus on",
    "origin": "Discuss the geographic and historical origin of the item",
    "inventor": "Identify the inventor(s) or creator(s) if applicable",
    "initial_adoption": "Describe how and where it was first adopted or popularized",
    "manufacturing_process": "Explain how the item is made or produced",
    "cultural_impact": "Analyze the cultural significance or impact of the item",
    "technological_influence": "Discuss any technological influence or advancements due to the item",
    "economic_importance": "Reflect on the economic relevance of the item, including market impact",
    "environmental_impact": "Address any environmental considerations or impacts",
    "legal_aspects": "Mention any notable legal battles or patent issues related to the item",
    "modern_day_usage": "Describe current uses and applications",
    "evolution_over_time": "Trace the evolution or changes in design, function, or popularity over time",
    "future_prospects": "Speculate on future developments or directions for the item"
}

# Predefined JSON structure
expected_format = {
    "title": "suggest a title for the video based on the item",
    "segments": [
        {
            "start_time": 0,
            "end_time": 6,
            "text_1": "13 word only",
            "visual_prompt_1": "verbose"
        },
        {
            "start_time": 7,
            "end_time": 12,
            "text_2": "13 word only",
            "visual_prompt_2":  "verbose"
        },
        {
            "start_time": "n",
            "end_time": "12",
            "text_2": "13 word only",
            "visual_prompt_2":  "verbose"
        },
        {
            "start_time": 49,
            "end_time": 54,
            "text_9": "13 word only",
            "visual_prompt_9":  "verbose"
        }
    ]
}

modelbrief = {
    "model_name": "DALL-E 2",
    "description": "An advanced AI image generation model from OpenAI that creates detailed and realistic images from textual descriptions. It is more of an image engineer than an artist.",
    "strengths": [
        "Produces high-quality, photorealistic images, often surpassing the detail and style requested.",
        "Capable of complex image generation, combining elements creatively in new ways which feel almost like human intelligence.",
    ],
    "weaknesses": [
        "Complex or abstract concepts can sometimes challenge the model, leading to unpredictable or less accurate renditions.",
        "While it has safety protocols to prevent generating inappropriate content, these can also limit the scope of creative outputs in sensitive areas.",
        "absolutely horrible at generating graphical data or technical data or even repeating any kind of text",
    ],
    "limitations": [
        "Not suitable for creating accurate maps, graphs, or other specific informational graphics.",
        "May not always adhere to strict historical or factual accuracy without highly detailed prompts.",
        "Content generation is restricted to avoid generating harmful or misleading images.",
        "The need for careful prompt design to avoid unwanted or unexpected elements in the generated images."
    ]
}
modelbrief2 = {
    "model_name": "DALL-E-3",
    "description": "An advanced AI image generation model from OpenAI that creates highly detailed and realistic images from textual descriptions. It is more of an image engineer than an artist.",
    "strengths": [
        "very good at capturing details.",
        "be very specific and verbose it can also stick to themes and story lines so you can keep the visual text being returned verbose",
    ],
    "weaknesses": [
        "Complex or abstract concepts can sometimes challenge the model, leading to unpredictable or less accurate renditions.",
        "While it has safety protocols to prevent generating inappropriate content, these can also limit the scope of creative outputs in sensitive areas.",
        "absolutely horrible at generating graphical data or technical data or even repeating any kind of text",
    ],
    "limitations": [
        "Not suitable for generating explicit or potentially harmful content due to ethical guidelines and restrictions.",
        "May not always adhere to strict historical or factual accuracy without highly detailed prompts.",
        "The quality of results can vary based on the specificity and clarity of the prompts provided so"
    ]
}
modelbrief3 = {
    "model_name": "gTTS",
    "description": "Google Text-to-Speech (gTTS) is a Python library and CLI tool that interfaces with Google's Text-to-Speech API. It converts text into natural-sounding spoken audio. gTTS supports multiple languages and is commonly used in applications that require spoken feedback or audio content.",
    "strengths": [
        "Produces clear and natural-sounding voice outputs, enhancing user engagement and understanding."
    ],
    "weaknesses": [
        "slow at reciting so produce only about 13 words per 5 seconds",
        "Limited control over voice modulation and emotional intonation compared to more advanced speech synthesis tools.",
    ],
    "adaptations": [
        "Adapt the length of text to ensure compatibility with the audio duration specified in video segments.",
        "Generate succinct and clear audio segments that align with the visual content's timing and pacing."
    ],
    "limitations": [
        "Lacks advanced features like speech emotion or different speaking styles, which are available in more specialized TTS systems.",
        "While it supports many languages, the quality of speech generation can vary between them."
    ]
}

system_instructions = [
        "Output strictly valid JSON only. Ensure the text fields are concise to fit within the designated timestamps for audio. the timestampse are in seconds",
        "Maintain key integrity; ensure all fields are accurately populated.",
        "Ensure a high correlation between visual prompts and the narrative text to enhance content relevance and viewer engagement.",
        "STRICTLY output only 13 words per 5 seconds for text_n field",
        "follow the models strengths and weknesses carefully",
        "MAKE SURE THE VISUAL PROPT IS VERBOSE in visual_prompt_n"
    ]
# Joining the instructions into a single string with each instruction on a new line for clarity
system_prompt = "Please follow these instructions:\n" + "\n".join(system_instructions)
# Constructing user prompt
userprompt = f"strict instructions: {context}. Here's the expected_format: {json.dumps(expected_format)}. try to incorporate along the lines of : {reqdata}. visual_prompt_n using the model: {modelbrief} text_n using the model: {modelbrief3}."

response = script(system_prompt, userprompt, url)
content = json.dumps(response)
#print (content)

response_json = json.loads(response)# Store the title in a variable
title = response_json['title']
# Store the segments as a list
segments = response_json['segments']

# Output the stored data
print("Title:", title)
print("Segments:", json.dumps(segments, indent=4))

visual_prompts = [segment[f'visual_prompt_{i+1}'] for i, segment in enumerate(segments)]

# Generating images
images_urls = image_generator(visual_prompts)

print(images_urls)

# Creating a directory with the current timestamp
dir_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
print(dir_name)
os.makedirs(dir_name, exist_ok=True)

#download urls
for idx, url in enumerate(images_urls):
    response = requests.get(url)
    file_path = os.path.join(dir_name, f"image_{idx+1}.png")
    if response.status_code == 200:
        with open(file_path, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded and saved: {file_path}")
    else:
        print(f"Failed to download image from {url}")

# Creating video clips from images
def create_video_clip(image_paths, fixed_duration=6):
    clips = [ImageClip(image).set_duration(fixed_duration) for image in image_paths]
    return concatenate_videoclips(clips, method="compose")


def add_subtitles(video, segments):
    composite_clips = [video]  # Start with the main video clip
    for idx, segment in enumerate(segments):
        text = segment.get(f'text_{idx+1}', '')  # Extract text
        start_time = segment.get('start_time', 0)
        end_time = segment.get('end_time', start_time + 5)  # Default duration 5 seconds

        # Creating the subtitle clip
        subtitle_clip = TextClip(
            text, fontsize=60, color='white', font='Impact',
            size=(video.size[0] * 0.8, None),  # Set width to 80% of the video width
            method='caption', align='South', stroke_color='orange2', stroke_width=3.5,
        )

        # Set subtitle clip start and duration times
        subtitle_clip = subtitle_clip.set_start(start_time).set_duration(end_time - start_time)

        # Set position to center and make sure it maintains 10% margin
        subtitle_clip = subtitle_clip.set_position(('right', 'bottom')) #horizontal,vertical

        composite_clips.append(subtitle_clip)
        print("subtitles added")
    
    # Return a composite clip that includes the video and subtitles
    return CompositeVideoClip(composite_clips)

# Creating final video with ffmpeg
def create_final_video(composite_video, audio, output_path):
    temp_video_path = output_path.replace('.mp4', '_temp.mp4')
    composite_video.write_videofile(temp_video_path, codec="libx264", fps=24)
    audio.write_audiofile(output_path.replace('.mp4', '_temp_audio.mp3'))
    ffmpeg_command = [
        'ffmpeg',
        '-i', temp_video_path,
        '-i', output_path.replace('.mp4', '_temp_audio.mp3'),
        '-c:v', 'copy',
        '-c:a', 'aac',
        '-strict', 'experimental',
        output_path
    ]
    subprocess.run(ffmpeg_command, check=True)


temp_audio_dir = os.path.join(dir_name, 'temp_audio')
os.makedirs(temp_audio_dir, exist_ok=True)

image_files = [os.path.join(dir_name, f"image_{i+1}.png") for i in range(len(images_urls))]
image_durations = [(item['end_time'] - item['start_time']) for item in segments]

initial_video = create_video_clip(image_files)
subtitled_video = add_subtitles(initial_video, segments)
audio_clips = generate_audio_clips(segments, temp_audio_dir)
final_video_path = os.path.join(dir_name, f"{title}_final.mp4")
create_final_video(subtitled_video, audio_clips, final_video_path)

print(f"Final video with subtitles and audio saved to {final_video_path}")


Title: Trains: Iron Horses of Industry
Segments: [
    {
        "start_time": 0,
        "end_time": 6,
        "text_1": "For centuries, trains have connected",
        "visual_prompt_1": "DALL-E 2: A vintage steam locomotive pulling a long line of passenger cars through a scenic mountain pass."
    },
    {
        "start_time": 7,
        "end_time": 12,
        "text_2": "people and goods across vast distances,",
        "visual_prompt_2": "DALL-E 2: A modern high-speed bullet train speeding through a futuristic cityscape."
    },
    {
        "start_time": 13,
        "end_time": 18,
        "text_3": "shaping the world as we know it.",
        "visual_prompt_3": "DALL-E 2: A map of the world highlighting major railway networks, showcasing their global reach."
    },
    {
        "start_time": 19,
        "end_time": 24,
        "text_4": "From the early steam engines of the",
        "visual_prompt_4": "DALL-E 2: An illustration of George Stephenson's Rocket, one of the first 

In [4]:
audio_clips = generate_audio_clips(segments, temp_audio_dir)
final_video_path = os.path.join(dir_name, f"{title}_final.mp4")
create_final_video(subtitled_video, audio_clips, final_video_path)

print(f"Final video with subtitles and audio saved to {final_video_path}")

Moviepy - Building video 2024-05-01_23-50-29/Into the Deep: A History of Submarines_final_temp.mp4.
Moviepy - Writing video 2024-05-01_23-50-29/Into the Deep: A History of Submarines_final_temp.mp4



                                                                

Moviepy - Done !
Moviepy - video ready 2024-05-01_23-50-29/Into the Deep: A History of Submarines_final_temp.mp4
MoviePy - Writing audio in 2024-05-01_23-50-29/Into the Deep: A History of Submarines_final_temp_audio.mp3


ffmpeg version 7.0 Copyright (c) 2000-2024 the FFmpeg developers     
  built with Apple clang version 15.0.0 (clang-1500.3.9.4)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.0 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopenvin

MoviePy - Done.


size=    2048KiB time=00:00:31.76 bitrate= 528.2kbits/s speed=62.9x    

Final video with subtitles and audio saved to 2024-05-01_23-50-29/Into the Deep: A History of Submarines_final.mp4


[out#0/mp4 @ 0x13ce44460] video:2267KiB audio:808KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 1.239631%
size=    3114KiB time=00:00:47.91 bitrate= 532.4kbits/s speed=58.2x    
[aac @ 0x13ce47130] Qavg: 1301.893


In [1]:
from director import script
from image_generation import image_generator
from audio_generation import generate_audio_clips
import json
import os
import requests
import subprocess  # For running ffmpeg command
from datetime import datetime
from moviepy.editor import TextClip, CompositeVideoClip, ImageClip, concatenate_videoclips

response = """{
    "title": "Boat Chronicles: A Journey Through Maritime History",
    "segments": [
        {
            "start_time": 0,
            "end_time": 6,
            "text_1": "From Rafts to Ocean Liners: Tracing the Evolution of Maritime Travel",
            "visual_prompt_1": "In the dawn of civilization, humanity crafted rafts from primitive materials, venturing into the unknown waters of rivers and lakes. These humble vessels, fashioned from woven reeds and animal hides, symbolize our earliest aspirations for exploration and discovery."
        },
        {
            "start_time": 7,
            "end_time": 12,
            "text_2": "Explorers and Empires: Navigating the High Seas of Medieval Times",
            "visual_prompt_2":  "Amidst the tumultuous waves of medieval seas, sturdy galleys and longships carried intrepid explorers to distant shores. These wooden behemoths, powered by the strength of oarsmen and the mercy of the wind, braved storms and conquered new horizons."
        },
        {
            "start_time": 13,
            "end_time": 18,
            "text_3": "Trade Winds and Tall Ships: Unveiling the Renaissance Maritime Revolution",
            "visual_prompt_3":  "As the Renaissance dawned, majestic caravels and galleons unfurled their billowing sails, riding the trade winds to exotic lands. These towering vessels, adorned with intricate carvings and colorful flags, heralded a new age of maritime exploration and commerce."
        },
        {
            "start_time": 19,
            "end_time": 24,
            "text_4": "Steam and Steel: Embracing Industrial Maritime Innovation",
            "visual_prompt_4":  "With the advent of the industrial revolution, ironclad steamships plied the world's oceans, transforming travel and trade. These mighty vessels, propelled by the power of steam and fueled by coal, ushered in an era of global connectivity and economic expansion."
        },
        {
            "start_time": 25,
            "end_time": 30,
            "text_5": "Modern Marvels: Experiencing Luxury and Adventure on Contemporary Yachts",
            "visual_prompt_5":  "In the modern era, sleek yachts and cruise liners epitomize luxury and adventure on the high seas. These floating palaces, equipped with state-of-the-art amenities and cutting-edge technology, offer travelers unparalleled comfort and style as they traverse the world's oceans."
        },
        {
            "start_time": 31,
            "end_time": 36,
            "text_6": "Exploring the Unknown: Delving into Oceanographic Expeditions",
            "visual_prompt_6":  "Beneath the waves, research vessels equipped with advanced sonar and submersibles delve into the mysteries of the deep. These scientific marvels, manned by intrepid explorers and equipped with state-of-the-art equipment, unlock the secrets of the ocean, from uncharted depths to thriving ecosystems."
        },
        {
            "start_time": 37,
            "end_time": 42,
            "text_7": "Sustainable Solutions: Embracing Eco-Friendly Practices in Boating",
            "visual_prompt_7":  "Amid growing environmental concerns, eco-friendly boats harness renewable energy and sustainable materials to navigate the world's waterways. From solar-powered catamarans to wind-propelled schooners, these green vessels pave the way towards a cleaner, greener future for maritime travel."
        },
        {
            "start_time": 43,
            "end_time": 48,
            "text_8": "Future Frontiers: Envisioning Innovations in Autonomous Shipping",
            "visual_prompt_8":  "In the era of automation, autonomous ships equipped with AI and sensors navigate the seas with unparalleled precision and efficiency. These unmanned vessels, guided by artificial intelligence and powered by renewable energy, herald a new chapter in maritime innovation and sustainability."
        }
    ]
}
"""
content = json.dumps(response)
#print (content)

response_json = json.loads(response)# Store the title in a variable
title = response_json['title']
# Store the segments as a list
segments = response_json['segments']

# Output the stored data
print("Title:", title)
print("Segments:", json.dumps(segments, indent=4))

visual_prompts = [segment[f'visual_prompt_{i+1}'] for i, segment in enumerate(segments)]

# Generating images
images_urls = image_generator(visual_prompts)

print(images_urls)

# Creating a directory with the current timestamp
dir_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
print(dir_name)
os.makedirs(dir_name, exist_ok=True)

#download urls
for idx, url in enumerate(images_urls):
    response = requests.get(url)
    file_path = os.path.join(dir_name, f"image_{idx+1}.png")
    if response.status_code == 200:
        with open(file_path, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded and saved: {file_path}")
    else:
        print(f"Failed to download image from {url}")

# Creating video clips from images
def create_video_clip(image_paths, fixed_duration=6):
    clips = [ImageClip(image).set_duration(fixed_duration) for image in image_paths]
    return concatenate_videoclips(clips, method="compose")


def add_subtitles(video, segments):
    composite_clips = [video]  # Start with the main video clip
    for idx, segment in enumerate(segments):
        text = segment.get(f'text_{idx+1}', '')  # Extract text
        start_time = segment.get('start_time', 0)
        end_time = segment.get('end_time', start_time + 5)  # Default duration 5 seconds

        # Creating the subtitle clip
        subtitle_clip = TextClip(
            text, fontsize=60, color='white', font='Impact',
            size=(video.size[0] * 0.8, None),  # Set width to 80% of the video width
            method='caption', align='South', stroke_color='orange2', stroke_width=3.5,
        )

        # Set subtitle clip start and duration times
        subtitle_clip = subtitle_clip.set_start(start_time).set_duration(end_time - start_time)

        # Set position to center and make sure it maintains 10% margin
        subtitle_clip = subtitle_clip.set_position(('right', 'bottom')) #horizontal,vertical

        composite_clips.append(subtitle_clip)
        print("subtitles added")
    
    # Return a composite clip that includes the video and subtitles
    return CompositeVideoClip(composite_clips)

# Creating final video with ffmpeg
def create_final_video(composite_video, audio, output_path):
    temp_video_path = output_path.replace('.mp4', '_temp.mp4')
    composite_video.write_videofile(temp_video_path, codec="libx264", fps=24)
    audio.write_audiofile(output_path.replace('.mp4', '_temp_audio.mp3'))
    ffmpeg_command = [
        'ffmpeg',
        '-i', temp_video_path,
        '-i', output_path.replace('.mp4', '_temp_audio.mp3'),
        '-c:v', 'copy',
        '-c:a', 'aac',
        '-strict', 'experimental',
        output_path
    ]
    subprocess.run(ffmpeg_command, check=True)


temp_audio_dir = os.path.join(dir_name, 'temp_audio')
os.makedirs(temp_audio_dir, exist_ok=True)

image_files = [os.path.join(dir_name, f"image_{i+1}.png") for i in range(len(images_urls))]
image_durations = [(item['end_time'] - item['start_time']) for item in segments]

initial_video = create_video_clip(image_files)
subtitled_video = add_subtitles(initial_video, segments)
audio_clips = generate_audio_clips(segments, temp_audio_dir)
final_video_path = os.path.join(dir_name, f"{title}_final.mp4")
create_final_video(subtitled_video, audio_clips, final_video_path)

print(f"Final video with subtitles and audio saved to {final_video_path}")


Title: Boat Chronicles: A Journey Through Maritime History
Segments: [
    {
        "start_time": 0,
        "end_time": 6,
        "text_1": "From Rafts to Ocean Liners: Tracing the Evolution of Maritime Travel",
        "visual_prompt_1": "In the dawn of civilization, humanity crafted rafts from primitive materials, venturing into the unknown waters of rivers and lakes. These humble vessels, fashioned from woven reeds and animal hides, symbolize our earliest aspirations for exploration and discovery."
    },
    {
        "start_time": 7,
        "end_time": 12,
        "text_2": "Explorers and Empires: Navigating the High Seas of Medieval Times",
        "visual_prompt_2": "Amidst the tumultuous waves of medieval seas, sturdy galleys and longships carried intrepid explorers to distant shores. These wooden behemoths, powered by the strength of oarsmen and the mercy of the wind, braved storms and conquered new horizons."
    },
    {
        "start_time": 13,
        "end_time": 18,


ChunkedEncodingError: ('Connection broken: IncompleteRead(1211847 bytes read, 1936134 more expected)', IncompleteRead(1211847 bytes read, 1936134 more expected))

In [3]:
from moviepy.editor import ImageSequenceClip, AudioFileClip
import os

def create_video_with_audio(image_folder, output_file_name):
    # Find the only MP3 file in the specified folder
    audio_files = [f for f in os.listdir(image_folder) if f.endswith('.mp3')]
    
    # Ensure that there's exactly one MP3 file
    if len(audio_files) != 1:
        raise ValueError("There must be exactly one MP3 file in the specified folder.")
    
    audio_file_name = audio_files[0]
    
    # List images in the specified folder, sorted in ascending order
    images = [os.path.join(image_folder, f) for f in sorted(os.listdir(image_folder)) if f.endswith('.png')]
    
    # Create a video clip from images, each image displayed for 6 seconds
    clip = ImageSequenceClip(images, fps=1/6)
    
    # Load the audio file
    audio = AudioFileClip(os.path.join(image_folder, audio_file_name))
    
    # Set the audio of the clip as the loaded audio file
    # Note: If audio duration is less than video, it will loop by default
    video = clip.set_audio(audio)
    
    # If the audio is longer than the video, you might want to cut it to match the video's duration
    if audio.duration > clip.duration:
        video = video.set_duration(clip.duration)
    
    # Write the result to a file (codec and bitrate can be adjusted according to your needs)
    video.write_videofile(os.path.join(image_folder, output_file_name), codec='libx264', bitrate='8000k')

# Usage example:
folder_path = '/path/to/your/images/and/audio'  # Update this path
output_name = 'output_video.mp4'                # Desired output file name

create_video_with_audio(folder_path, output_name)

UnboundLocalError: cannot access local variable 'video' where it is not associated with a value

In [7]:
from moviepy.editor import ImageSequenceClip
import os

def create_video_from_images(image_folder, output_file_name):
    # List all images in the specified folder, sorted in ascending order
    images = [os.path.join(image_folder, f) for f in sorted(os.listdir(image_folder)) if f.endswith('.png')]
    
    # Create a video clip from images, each image displayed for 6 seconds
    clip = ImageSequenceClip(images, fps=1/6)
    
    # Write the result to a file (codec and bitrate can be adjusted according to your needs)
    clip.write_videofile(os.path.join(image_folder, output_file_name), codec='libx264', bitrate='8000k')

# Usage example:
folder_path = r'/Users/p/Desktop/untitled folder 2/multiscript/2024-05-08_12-22-33'  # Update this path
output_name = 'output_video.mp4'      # Desired output file name

create_video_from_images(folder_path, output_name)

Moviepy - Building video /Users/p/Desktop/untitled folder 2/multiscript/2024-05-08_12-22-33/output_video.mp4.
Moviepy - Writing video /Users/p/Desktop/untitled folder 2/multiscript/2024-05-08_12-22-33/output_video.mp4



                                                           

Moviepy - Done !
Moviepy - video ready /Users/p/Desktop/untitled folder 2/multiscript/2024-05-08_12-22-33/output_video.mp4
