<a href="https://colab.research.google.com/github/JaganK2Commit/Copilot/blob/main/BedTimeStory_V3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### What should be the video be about ?


In [1]:
NARRATOR_ADJECTIVES = "story teller"
MUSIC_STYLE = "kids friendly random music"

## Tools and Models used in this notebook

Tools and models used in this script

| Task                | Service         | Model              |
|---------------------|-----------------|--------------------|
| Script generation   | OpenAI API      | gpt-3.5-turbo      |
| Text to Image       | Replicate AI API| ai-forever/kandinsky-2 |
| Text to Audio       | Replicate AI API| suno-ai/bark |


### Install the dependencies

In [2]:
!pip install replicate
!pip install requests
!pip install openai
!pip install langchain
!pip install moviepy
!pip install ffmpeg --upgrade
!pip install pydub
!pip install resemble
from google.colab import output
output.clear()

In [3]:
import os
import openai
import time
import numpy as np
import replicate
import json

In [4]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain, SequentialChain, TransformChain
from langchain.llms import OpenAI, Replicate
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from pydub import AudioSegment

## Aquire Tokens for Replicate and OpenAI

In [5]:
# get your token from https://replicate.com/account
from getpass import getpass

REPLICATE_API_TOKEN = getpass()
os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN

··········


In [6]:
# get your key from https://platform.openai.com/account/api-keys
OPENAI_API_KEY = getpass()
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
openai.api_key = os.getenv("OPENAI_API_KEY")

··········


In [7]:
# get your key from https://app.resemble.ai/account/api
from resemble import Resemble

RESEMBLE_API_KEY = getpass()
os.environ["RESEMEBLE_API_KEY"] = RESEMBLE_API_KEY
Resemble.api_key(os.environ['RESEMEBLE_API_KEY'])

··········


## Create the LongChain

In [76]:
# LLMChain to write a a script for our how-to video.
topic_template = """
Generate a script for a short moral story for children of 5 to 7 years old, which should be in not more than 15 paragraphs.
Write the story in JSON format with the following keys:

"title": The name of the story
"description": Description that can be used for YouTube video description, not exceeding 20 words
"paragraphs": A list of 15 paragraphs that make up the story
"visual_descriptions": A visual description that correspond to each paragraph. Each visual description is an array object with two keys character_descriptions, scene_description. Each character_descriptions has two keys name and a description of the characters.

Please make sure that each visual description includes both the character introductions and the scene descriptions to allow AI image tools to generate appropriate pictures for each scene.

Thank you!
"""
system_message_prompt = SystemMessage(content="You are a story teller, shares wide range of fairy tales, bedtime stories, kids stories, and be creative in your responses")
human_message_prompt = HumanMessagePromptTemplate(prompt=PromptTemplate(
                                                  template=topic_template,
                                                  input_variables=[]
                                                  ))

# create the initial script
chat = ChatOpenAI(temperature=0.99, model_name="gpt-3.5-turbo")
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])
script_chain = LLMChain(llm=chat, prompt=chat_prompt_template, output_key='script')

script = script_chain.run({"narrator_adjectives": NARRATOR_ADJECTIVES, "music_style": MUSIC_STYLE})
# data = json.loads(result)
# script = data['paragraphs']
# video_descriptions = data['video_descriptions']

print(script)
print(chat_prompt_template)
# print(video_descriptions)

{
  "title": "The Caring Squirrel",
  "description": "A heartwarming story about a squirrel who learns the importance of kindness and helping others.",
  "paragraphs": [
    "Once upon a time, in a lush forest, there lived a curious little squirrel named Sandy.",
    "Sandy loved collecting shiny acorns and exploring the tall trees, but was always alone.",
    "One day, as Sandy was gathering acorns, a tiny bird named Bella fell from her nest.",
    "Sandy rushed over and helped Bella back into her cozy nest at the top of the tree.",
    "Bella was grateful and asked if she could join Sandy on his acorn collecting adventures.",
    "Excited to have a friend, Sandy eagerly agreed and they became inseparable.",
    "One gloomy day, they stumbled upon a scared rabbit named Fluffy, who had lost his way.",
    "Sandy and Bella comforted Fluffy and guided him back to his cozy burrow.",
    "Fluffy thanked them and soon joined Sandy and Bella on their adventures.",
    "As they explored toget

In [27]:
script = """
{
  "title": "The Adventure of Little Squirrel",
  "description": "Join Little Squirrel on his exciting journey to find the Golden Acorn.",
  "paragraphs": [
    "Once upon a time, in a beautiful forest, lived a small, energetic squirrel named Little Squirrel. He was known for his shiny, brown fur and his love for acorns.",
    "One day, Little Squirrel heard a legend about a Golden Acorn that was hidden somewhere in the forest. The Golden Acorn was said to grant one wish to the one who found it.",
    "Excited by the legend, Little Squirrel decided to embark on a journey to find the Golden Acorn. He packed his tiny backpack with nuts and berries and set off early the next morning.",
    "His first stop was the Old Oak Tree. He climbed to the top, but the Golden Acorn was not there. He did, however, find a clue - a small, golden leaf.",
    "Following the clue, Little Squirrel arrived at the Sparkling Stream. He searched high and low, but the Golden Acorn was not there either. Instead, he found another clue - a golden pebble.",
    "The golden pebble led him to the Whispering Windmill. He climbed to the top and looked around. And there, hidden in a corner, was the Golden Acorn!",
    "Little Squirrel was overjoyed. He held the Golden Acorn high above his head and made a wish. He wished for the forest to always be full of food for all its inhabitants.",
    "As soon as he made his wish, the Golden Acorn started to glow. It shot up into the sky and exploded into a shower of golden sparks. When the sparks touched the ground, they turned into all kinds of fruits, nuts, and berries.",
    "Little Squirrel returned home, his heart full of joy. He had not only found the Golden Acorn but also ensured that his friends in the forest would never go hungry.",
    "From that day forward, the forest was always full of food. And Little Squirrel became a hero, loved by all for his bravery and kindness."
  ],
  "visual_descriptions": [
    {
      "character_descriptions": [
        {
          "name": "Little Squirrel",
          "description": "A small, energetic squirrel with shiny, brown fur."
        }
      ],
      "scene_description": "A lush, beautiful forest with tall trees and colorful flowers."
    },
    {
      "character_descriptions": [],
      "scene_description": "Little Squirrel sitting in a circle of forest animals, listening to an old, wise owl."
    },
    {
      "character_descriptions": [],
      "scene_description": "Little Squirrel packing his tiny backpack with nuts and berries in his cozy tree hole home."
    },
    {
      "character_descriptions": [],
      "scene_description": "Little Squirrel at the top of the Old Oak Tree, holding a small, golden leaf."
    },
    {
      "character_descriptions": [],
      "scene_description": "Little Squirrel searching around the Sparkling Stream, holding a golden pebble."
    },
    {
      "character_descriptions": [],
      "scene_description": "Little Squirrel at the top of the Whispering Windmill, holding the Golden Acorn high above his head."
    },
    {
      "character_descriptions": [],
      "scene_description": "Little Squirrel making a wish, with the Golden Acorn glowing in his hands."
    },
    {
      "character_descriptions": [],
      "scene_description": "The Golden Acorn shooting up into the sky and exploding into a shower of golden sparks."
    },
    {
      "character_descriptions": [],
      "scene_description": "Little Squirrel returning home, with a trail of fruits, nuts, and berries behind him."
    },
    {
      "character_descriptions": [],
      "scene_description": "The forest full of food, with Little Squirrel being celebrated as a hero by the other forest animals."
    }
  ]
}
"""

In [77]:
# LLMChain to write a title for our video
llm = OpenAI(temperature=.9)
template = """Please come up with a creative and zany title for the below how-to video script.
Puns are encouraged. Don't include quotations (") in the output.

Script:
{script}
Title: """
prompt_template = PromptTemplate(input_variables=["script"], template=template)
title_chain = LLMChain(llm=llm, prompt=prompt_template, output_key='title')

# title = title_chain.run({"script": script})
# print(title)



## Script Scrubbing

In [78]:
## Fill the characters_descriptions with the last known description for each paragraph

def fill_character_descriptions(input_data):
    character_descriptions = {}

    for item in input_data["visual_descriptions"]:
        current_character_descriptions = item.get("character_descriptions", [])
        for character_description in current_character_descriptions:
            character_name = character_description["name"]
            character_description_text = character_description["description"]
            character_descriptions[character_name] = character_description_text

    for item in input_data["visual_descriptions"]:
        updated_character_descriptions = []
        current_character_descriptions = item.get("character_descriptions", [])
        for character_description in current_character_descriptions:
            character_name = character_description["name"]
            if character_name in character_descriptions:
                updated_character_description = {
                    "name": character_name,
                    "description": character_descriptions[character_name]
                }
                updated_character_descriptions.append(updated_character_description)
        item["character_descriptions"] = updated_character_descriptions

    return input_data


# Fill the character_descriptions with the last defined value
script = fill_character_descriptions(json.loads(script))

# Print the result
# for item in filled_input_data['visual_descriptions']:
#     print(item)

In [79]:
# LLMChain to create the replicate predictions for our text-to-image model
import re
def update_scene_descriptions(input_data):
    character_descriptions = input_data["character_descriptions"]
    scene_description = input_data["scene_description"]

    updated_scene_descriptions = []

    if character_descriptions:
        for character_description in character_descriptions:
            character_name = character_description["name"]
            character_description_text = character_description["description"]

            # Check if the character name appears in the scene description
            if character_name in scene_description:
                # Append the character description to the scene description
                updated_scene_description = re.sub(re.escape(character_name), character_name + "," + character_description_text, scene_description, flags=re.IGNORECASE)
                updated_scene_descriptions.append(updated_scene_description)
            else:
              updated_scene_descriptions.append(scene_description)
    else:
        # If no character descriptions available, return original scene descriptions
        updated_scene_descriptions.append(scene_description)
    return updated_scene_descriptions


def transform_func(inputs: dict) -> dict:
  video_model = replicate.models.get('ai-forever/kandinsky-2')
  video_version = video_model.versions.get("601eea49d49003e6ea75a11527209c4f510a93e2112c969d548fbb45b9c4f19f")
  descriptions = inputs['script']['visual_descriptions']

  predictions = []
  known_character_descriptions = [];

  for description in descriptions:
      print(description)
      updated_description = update_scene_descriptions(description)[0]
      print(f"Creating video prediction for '{updated_description}'...")
      video_prediction = replicate.predictions.create(version=video_version,
                                                      input={"prompt": updated_description, "prior_steps": '5', "guidance_scale": 4, "num_inference_steps": 100, "prior_cf_scale":4,
                                                             "scheduler": "p_sampler", "width": 1024, "height":768})
      predictions.append(video_prediction)
  return {'video_predictions': predictions}

video_predictions_chain = TransformChain(input_variables=['script'], output_variables=['video_predictions'], transform=transform_func)

# video_predictions = video_predictions_chain.run({"script": script})
# print(video_predictions)

In [82]:

page = 1
page_size = 100

# response = Resemble.v2.voices.all(page, page_size)
# voice = list(filter(lambda x: x['name'] == 'Jagan', response['items']))[0]

# response = Resemble.v2.projects.all(page, page_size)
# projects = response['items']

# response = Resemble.v2.clips.all(project_uuid, page, page_size)
# clips = response['items']

# create a new clip

audio_descriptions = json.loads(script)['paragraphs'][1:2]
print(audio_descriptions)

audio_clips = []
for i, description in enumerate(audio_descriptions):
    print(f"Creating audio prediction for {description}")
    project_uuid = '7468a441'
    voice_uuid = 'f0426afb'
    callback_uri = 'https://example.com/callback/resemble-clip'
    body = description
    print(body)
    response = Resemble.v2.clips.create_async(
        project_uuid,
        voice_uuid,
        callback_uri,
        body,
        title=f"My clip {i}",
        sample_rate=None,
        output_format=None,
        precision=None,
        include_timestamps=None,
        is_public=False,
        is_archived=False
    )

    audio_clips.append(response)


['One day, Little Squirrel heard a legend about a Golden Acorn that was hidden somewhere in the forest. The Golden Acorn was said to grant one wish to the one who found it.']
Creating audio prediction for One day, Little Squirrel heard a legend about a Golden Acorn that was hidden somewhere in the forest. The Golden Acorn was said to grant one wish to the one who found it.
One day, Little Squirrel heard a legend about a Golden Acorn that was hidden somewhere in the forest. The Golden Acorn was said to grant one wish to the one who found it.


In [84]:
done = False
audio_urls = []
while not done:
  done = True
  for clip in audio_clips:
    response = Resemble.v2.clips.get(project_uuid, clip['item']['uuid'])
    #print(f'{response['item']['title']} - ${response.success}')
  if(response['success'] == False or hasattr(response['item'], 'audio_src')):
    done = False
  else:
    audio_urls.append(response['item']['audio_src'])
  time.sleep(2)
  output.clear()

print("All audio clips are processed")

audio_urls

All audio clips are processed


['https://app.resemble.ai/rails/active_storage/blobs/redirect/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBCSk5yR1EwPSIsImV4cCI6bnVsbCwicHVyIjoiYmxvYl9pZCJ9fQ==--6581f12e1133f6d5e8073c04d63d14691a5127cc/My+clip+0-14f3e00c.wav']

In [81]:
# LLMChain to create the replicate predictions for our bark model
def transform_func(inputs: dict) -> dict:
  audio_model = replicate.models.get("suno-ai/bark")
  audio_version = audio_model.versions.get("b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787")
  parsed_script = inputs['script']['paragraphs']

  predictions = []

  for line in parsed_script:
      print(f"Creating audio prediction for '{line}''...")
      audio_prediction = replicate.predictions.create(version=audio_version,
                                                      input={"prompt": line, "history_prompt": "announcer", "text_temp": 0.7, "waveform_temp":0.8})
      predictions.append(audio_prediction)
  return {'audio_predictions': predictions}

audio_predictions_chain = TransformChain(input_variables=['script'], output_variables=['audio_predictions'], transform=transform_func)

In [82]:
# LLMChain to create the replicate predictions for our bark model
def transform_func(inputs: dict) -> dict:
  audio_model = replicate.models.get("suno-ai/bark")
  audio_version = audio_model.versions.get("b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787")
  parsed_script = inputs['script']['paragraphs']

  predictions = []

  for line in parsed_script:
      print(f"Creating audio prediction for '{line}''...")
      audio_prediction = replicate.predictions.create(version=audio_version,
                                                      input={"prompt": line, "history_prompt": "announcer", "text_temp": 0.7, "waveform_temp":0.8})
      predictions.append(audio_prediction)
  return {'audio_predictions': predictions}

audio_predictions_chain = TransformChain(input_variables=['script'], output_variables=['audio_predictions'], transform=transform_func)

# audio_predictions = audio_predictions_chain.run({"script": script})
# print(audio_predictions)

In [83]:
# LLMChain to create the cover image
llm = OpenAI(temperature=.9)
template = """
          Create a visual description artstation, hd, dramatic lighting, detailed for the following script.
          "{script}""
          """
prompt_template = PromptTemplate(input_variables=["script"], template=template)

text2image = Replicate(model="ai-forever/kandinsky-2:601eea49d49003e6ea75a11527209c4f510a93e2112c969d548fbb45b9c4f19f",
                       input={"prior_steps": '5', "guidance_scale": 4, "num_inference_steps": 100, "prior_cf_scale":4,
                                                        "scheduler": "p_sampler"})

# text2image = Replicate(model="stability-ai/stable-diffusion:db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf",
#                        input={'image_dimensions': '512x512', "negative_prompt": "text, writing"})
title_image_chain = LLMChain(llm=text2image, prompt=prompt_template, output_key='title_image')

# title_image = title_image_chain.run({"script": script})
# print(title_image)

In [84]:
# LLMChain to write the thank you note at the end of our video
template = """Please come up with a creative and zany ending quote from our narrator.
The script is what the narrator just read. We want to close things out in less than 15 words.

Make sure you add a "And don't forget to like and subscribe!" to the end of your output.

You are a {narrator_adjectives} narrator.

Script:
{script}
Ending quote:
"""

system_message_prompt = SystemMessage(content="You are a helpful assistant.")
human_message_prompt = HumanMessagePromptTemplate(prompt=PromptTemplate(
                                                  template=template,
                                                  input_variables=["script", "narrator_adjectives"]))
chat_prompt_template = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
ending_quote_chain = LLMChain(llm=chat, prompt=chat_prompt_template, output_key='ending_quote')

# ending_quote = ending_quote_chain.run({"script": script, "narrator_adjectives": NARRATOR_ADJECTIVES})
# print(ending_quote)

In [85]:
# LLMChain to create the prediction that generates the audio for the thank you note
def transform_func(inputs: dict) -> dict:
  audio_model = replicate.models.get("suno-ai/bark")
  audio_version = audio_model.versions.get("b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787")
  ending_quote_prediction = replicate.predictions.create(version=audio_version,
                                                      input={"prompt": inputs['ending_quote'], "history_prompt": "announcer", "text_temp": 0.7, "waveform_temp":0.8}) #en_speaker_6 female, loud
  return {'ending_quote_prediction': ending_quote_prediction}

ending_quote_prediction_chain = TransformChain(input_variables=['ending_quote'], output_variables=['ending_quote_prediction'], transform=transform_func)

# ending_quote_prediction = ending_quote_prediction_chain.run({"ending_quote": ending_quote})
# print(ending_quote_prediction)

#chain_output['ending_quote_prediction'].output['audio_out']


In [86]:
# LLMChain to create the prediction that generates the audio for the title
def transform_func(inputs: dict) -> dict:
  audio_model = replicate.models.get("suno-ai/bark")
  audio_version = audio_model.versions.get("b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787")
  title_prediction = replicate.predictions.create(version=audio_version,
                                                      input={"prompt": inputs['title'],"history_prompt": "announcer", "text_temp": 0.7, "waveform_temp":0.8})
  return {'title_audio_prediction': title_prediction}

title_audio_prediction_chain = TransformChain(input_variables=['title'], output_variables=['title_audio_prediction'], transform=transform_func)

# title_audio_prediction = title_audio_prediction_chain.run({"title": title})
# print(title_audio_prediction)


In [87]:
# LLMChain to create the prediction that generates the background music
def transform_func(inputs: dict) -> dict:
  model = replicate.models.get("riffusion/riffusion")
  version = model.versions.get("8cf61ea6c56afd61d8f5b9ffd14d7c216c0a93844ce2d82ac1c9ecc9c7f24e05")
  music_prediction = replicate.predictions.create(version=version, input={"prompt": inputs['music_style']})

  return {'music_prediction': music_prediction}

music_prediction_chain = TransformChain(input_variables=['music_style'], output_variables=['music_prediction'], transform=transform_func)

music_prediction = music_prediction_chain.run({"music_style": MUSIC_STYLE})
print(music_prediction)

id='mhgui7bb3bskjs3gdxmo4wysti' error=None input={'prompt': 'kids friendly random music'} logs='' output=None status='starting' version=Version(id='8cf61ea6c56afd61d8f5b9ffd14d7c216c0a93844ce2d82ac1c9ecc9c7f24e05', created_at=datetime.datetime(2022, 12, 16, 7, 48, 40, 890140, tzinfo=datetime.timezone.utc), cog_version='0.6.1', openapi_schema={'info': {'title': 'Cog', 'version': '0.1.0'}, 'paths': {'/': {'get': {'summary': 'Root', 'responses': {'200': {'content': {'application/json': {'schema': {}}}, 'description': 'Successful Response'}}, 'operationId': 'root__get'}}, '/predictions': {'post': {'summary': 'Predict', 'responses': {'200': {'content': {'application/json': {'schema': {'$ref': '#/components/schemas/PredictionResponse'}}}, 'description': 'Successful Response'}, '422': {'content': {'application/json': {'schema': {'$ref': '#/components/schemas/HTTPValidationError'}}}, 'description': 'Validation Error'}}, 'description': 'Run a single prediction on the model', 'operationId': 'pre

## Run the chain

Now, let's execute the chain we created. This is relatively fast, because the chains that create long-running predictions (like the video_predictions_chain) make asynchronous calls to the Replicate API.

In [88]:
# Run the chain
overall_chain = SequentialChain(chains=[
    #script_chain,
                                        title_chain,
                                        video_predictions_chain,
                                        audio_predictions_chain,
                                        ending_quote_chain,
                                        ending_quote_prediction_chain,
                                        title_image_chain,
                                        title_audio_prediction_chain,
                                        music_prediction_chain
                                        ], input_variables=['script', 'narrator_adjectives', 'music_style'], output_variables=[
                                            #'script',
                                            'title', 'video_predictions', 'audio_predictions', 'ending_quote', 'title_image', 'ending_quote_prediction', 'title_audio_prediction', 'music_prediction'], verbose=True)
chain_output = overall_chain({"script":script, "narrator_adjectives": NARRATOR_ADJECTIVES, "music_style": MUSIC_STYLE})



[1m> Entering new  chain...[0m
{'character_descriptions': [{'name': 'Sandy', 'description': 'A curious little squirrel with a bushy tail.'}], 'scene_description': 'A lush forest with towering trees and colorful foliage.'}
Creating video prediction for 'A lush forest with towering trees and colorful foliage.'...
{'character_descriptions': [{'name': 'Sandy', 'description': 'A curious little squirrel with a bushy tail.'}], 'scene_description': 'Sandy collecting shiny acorns while exploring the tall trees.'}
Creating video prediction for 'Sandy,A curious little squirrel with a bushy tail. collecting shiny acorns while exploring the tall trees.'...
{'character_descriptions': [{'name': 'Sandy', 'description': 'A curious little squirrel with a bushy tail.'}, {'name': 'Bella', 'description': 'A tiny bird with colorful feathers.'}], 'scene_description': 'Sandy helping Bella back into her cozy nest at the top of the tree.'}
Creating video prediction for 'Sandy,A curious little squirrel with 

In [89]:
# unpack outputs
script = chain_output['script']
title = script['title']
split_script = script['paragraphs']
video_descriptions = script['visual_descriptions']
video_predictions = chain_output['video_predictions']
audio_predictions = chain_output['audio_predictions']

# print(title)
print(len(split_script))
print(len(video_descriptions))
# print(video_predictions)
# print(audio_predictions)

16
15


In [90]:
# sanity check
assert len(split_script) == len(video_descriptions)

AssertionError: ignored

# ⏳ Wait for our async predictions to complete
Here's a helper to check in on our predictions. This usually takes a minute or two.

In [91]:
def all_done(predictions):
    return set([p.status for p in predictions]) == {'succeeded'}

In [92]:
all_predictions = chain_output['video_predictions'] + \
                  chain_output['audio_predictions'] + \
                  [chain_output['ending_quote_prediction']] + \
                  [chain_output['title_audio_prediction']] + \
                  [chain_output['music_prediction']]

In [93]:
done = False

while not done:
  [p.reload() for p in all_predictions]
  for p in all_predictions:
    print(f'https://replicate.com/p/{p.id}', p.status)
  done = all_done(all_predictions)
  time.sleep(2)
  output.clear()

print("Predictions complete")

Predictions complete


# 🪡 Stitch them all together!

In [94]:
video_urls = [v.output for v in video_predictions]
audio_urls = [a.output['audio_out'] for a in audio_predictions]
music_url = chain_output['music_prediction'].output['audio']
subtitles = split_script
title_image_url = chain_output['title_image']
title_audio_url = chain_output['title_audio_prediction'].output['audio_out']

video_urls


[['https://replicate.delivery/pbxt/PYUOekYn9jWDFqryg6LqArs4iWUIcx5ZwdHmuJdwF2ik6nmIA/out_0.png'],
 ['https://replicate.delivery/pbxt/NfvCcgJiKQUEYyNBwfWeDjoIoMeguJFU5MKaABzUjNcqUfpJC/out_0.png'],
 ['https://replicate.delivery/pbxt/kKhut0oVENo5FdCoxH9SL17HcNmIxVgcCwDlpEWNfZBl6nmIA/out_0.png'],
 ['https://replicate.delivery/pbxt/BYqMegWeTgt8JkIc3ZsWczSPrFArGYmTcTck9tlhIpDM1PNRA/out_0.png'],
 ['https://replicate.delivery/pbxt/xGoAfWBVuDX1JCTXFcgfEU74MggCwsKDWAZaSX9EM0fsqf0EB/out_0.png'],
 ['https://replicate.delivery/pbxt/mNBmhIhXfUQAOKlnjffHgi0c70Igtcv18ebymBys9Qf5qeTTE/out_0.png'],
 ['https://replicate.delivery/pbxt/we5mZiCc0pyrDa8MwN7CrFKnIfUVVMBdWIhAGRrPOEqZ1PNRA/out_0.png'],
 ['https://replicate.delivery/pbxt/xetrUOzsyjRjNCpTdGIJPEwiQcmoNVGrF2VPCN9SHiRt6nmIA/out_0.png'],
 ['https://replicate.delivery/pbxt/ePTFfnx3JIlt4kKRb4A1YBFT2fwJoHweKzh0iCyoEjuSWfpJC/out_0.png'],
 ['https://replicate.delivery/pbxt/pb4dF3kwj47lHJoe1iRJsLbhWQqPleivLeSYzTRp5DKJrf0EB/out_0.png'],
 ['https://replicate

In [95]:
## slow down the audio urls

import requests
import os

def slow_down_audio(input_file, output_file, slowdown_factor):
    # Load the audio file
    audio = AudioSegment.from_wav(input_file)

    # Calculate the new length of the audio
    new_length = int(len(audio) / slowdown_factor)

    # Stretch the audio by duplicating frames
    slowed_audio = audio._spawn(audio.raw_data, overrides={
        "frame_rate": int(audio.frame_rate * slowdown_factor)
    })

    # Export the modified audio to a file
    slowed_audio.export(output_file, format="mp3")
    print("Slowdown complete. Output file:", output_file)

slowdown_factor = 0.96

In [96]:
audio_files = []

for i, url in enumerate(audio_urls):
    response = requests.get(url)
    audio_filename = f"temp_audio{i}.wav"
    with open(audio_filename, "wb") as audio_file:
        audio_file.write(response.content)

    # Generate the output file name
    output_file = os.path.join(audio_filename.replace(".wav", ".mp3"))

    # Call the slow_down_audio function
    slow_down_audio(audio_filename, output_file, slowdown_factor)

    # Append the output file name to the array
    audio_files.append(output_file)
    #audio_files.append(audio_filename)

Slowdown complete. Output file: temp_audio0.mp3
Slowdown complete. Output file: temp_audio1.mp3
Slowdown complete. Output file: temp_audio2.mp3
Slowdown complete. Output file: temp_audio3.mp3
Slowdown complete. Output file: temp_audio4.mp3
Slowdown complete. Output file: temp_audio5.mp3
Slowdown complete. Output file: temp_audio6.mp3
Slowdown complete. Output file: temp_audio7.mp3
Slowdown complete. Output file: temp_audio8.mp3
Slowdown complete. Output file: temp_audio9.mp3
Slowdown complete. Output file: temp_audio10.mp3
Slowdown complete. Output file: temp_audio11.mp3
Slowdown complete. Output file: temp_audio12.mp3
Slowdown complete. Output file: temp_audio13.mp3
Slowdown complete. Output file: temp_audio14.mp3
Slowdown complete. Output file: temp_audio15.mp3


In [1]:
import requests
import os
import moviepy.editor as mp
import moviepy.video.fx.all as vfx
import textwrap
from moviepy.editor import *
from PIL import Image, ImageDraw, ImageFont
from io import BytesIO
import numpy as np


# Download video and audio files
video_files = []
audio_files = []
# for i, url in enumerate(video_urls):
#     response = requests.get(url)
#     video_filename = f"temp_video{i}.mp4"
#     with open(video_filename, "wb") as video_file:
#         video_file.write(response.content)
#     video_files.append(video_filename)
fps = 12.0
for i, url in enumerate(video_urls):
    response = requests.get(url[0])
    image = Image.open(BytesIO(response.content))
    image_np = np.array(image)
    clip = mp.ImageSequenceClip([image_np], fps=fps)
    video_filename = f"temp_video{i}.mp4"
    clip.write_videofile(video_filename, codec='libx264', fps=fps)
    #with open(video_filename, "wb") as video_file:
        #video_file.write(response.content)
    video_files.append(video_filename)

for i, url in enumerate(audio_urls):
    response = requests.get(url)
    audio_filename = f"temp_audio{i}.wav"
    with open(audio_filename, "wb") as audio_file:
        audio_file.write(response.content)

    # Generate the output file name
    output_file = os.path.join(audio_filename.replace(".wav", ".mp3"))

    # Call the slow_down_audio function
    slow_down_audio(audio_filename, output_file, slowdown_factor)

    # Append the output file name to the array
    audio_files.append(output_file)
    #audio_files.append(audio_filename)

# Load and process video and audio files

processed_videos = []
for i, audio_file in enumerate(audio_files):
    try:
      video = mp.VideoFileClip(video_files[i])
    except IndexError:
      video = mp.VideoFileClip(video_files[i-1])
    #video = mp.VideoFileClip(video_files[i])
    audio = mp.AudioFileClip(audio_file)

    # Loop the video for the duration of the audio
    looped_video = mp.concatenate_videoclips([video] * int(audio.duration // video.duration + 1))

    # Set the audio of the video to the audio file
    video_with_audio = looped_video.set_audio(audio)
    processed_videos.append(video_with_audio)

# Concatenate all the processed videos
final_video = mp.concatenate_videoclips(processed_videos)

## The following adds the title image / narration to the video.
# Add this function to create the text image
def txt_image(img, txt, font_size, color):
    image = img.copy()
    draw = ImageDraw.Draw(image)
    draw.text((50, 50), txt, fill=(255, 255, 0))
    # font = ImageFont.load_default().font_variant(size=font_size)
    # draw.text((50, 50), txt, font=font, fill=color)
    return image

# Download and create the image clip
image_url = title_image_url
response = requests.get(image_url)
img = Image.open(BytesIO(response.content))

# Resize the image to match the video dimensions
img_resized = img.resize((1200, 900))

# Download the audio file
audio_url = chain_output['ending_quote_prediction'].output['audio_out']
response = requests.get(audio_url)
audio_filename = "temp_audio_ending.wav"
with open(audio_filename, "wb") as audio_file:
    audio_file.write(response.content)

# Generate the output file name
output_file = os.path.join(audio_filename.replace(".wav", ".mp3"))

# Call the slow_down_audio function
slow_down_audio(audio_filename, output_file, slowdown_factor)

# Create the audio clip
audio_ending = AudioFileClip(output_file)

# make title empty for now, couldn't figure out how to get it bigger
text = chain_output['title']
img_text = ImageClip(np.asarray(txt_image(img_resized, txt='text', font_size=72, color="white")), duration=4)

# Set the audio of the image clip to the audio file and trim it to the same duration
img_text_audio_ending = mp.concatenate_videoclips([img_text] * int(audio_ending.duration // img_text.duration + 1))
img_text_audio_ending = img_text.set_audio(audio_ending)

# Download the title page audio file
audio_url = chain_output['title_audio_prediction'].output['audio_out']
response = requests.get(audio_url)
with open("temp_audio_title.mp3", "wb") as audio_file:
    audio_file.write(response.content)

# Create the audio clip
audio_beginning = AudioFileClip("temp_audio_title.mp3")

# Set the audio of the image clip to the audio file and trim it to the same duration
img_text_audio_beginning = mp.concatenate_videoclips([img_text] * int(audio_beginning.duration // img_text.duration + 1))
img_text_audio_beginning = img_text.set_audio(audio_beginning)

# Concatenate the image clip with the processed videos
width, height = processed_videos[0].size
title_video = img_text_audio_beginning.resize((width, height))
ending_video = img_text_audio_ending.resize((width, height))

# processed_videos.insert(0, title_video)
processed_videos.append(ending_video)

final_video = concatenate_videoclips(processed_videos)

# Download the background audio file
bg_audio_url = music_url
response = requests.get(bg_audio_url)
with open("temp_bg_audio.mp3", "wb") as audio_file:
    audio_file.write(response.content)

# Create the background audio clip
bg_audio = AudioFileClip("temp_bg_audio.mp3")

# Calculate the duration of the final video
video_duration = final_video.duration

# Loop the background audio to match the final video's duration
bg_audio_looped = bg_audio.fx(afx.audio_loop, duration=video_duration)
bg_audio_looped = bg_audio_looped.volumex(0.3)

# Overlay the background audio with the audio from the final video
final_audio = CompositeAudioClip([final_video.audio, bg_audio_looped])

# Set the audio of the final video to the combined audio
final_video_with_bg_audio = final_video.set_audio(final_audio)

# Save the final video
final_video_with_bg_audio.write_videofile(f"how_to.mp4", codec='libx264', audio_codec='aac')

# Clean up temporary files
for video_file, audio_file in zip(video_files, audio_files):
    os.remove(video_file)
# for audio_file in audio_files:
#     os.remove(audio_file)

NameError: ignored

In [99]:
#@title Watch the video
from IPython.display import HTML
from base64 import b64encode
mp4 = open(f'how_to.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
chain_output['ending_quote_prediction'].output['audio_out']