# StoryBoard

<a href="https://colab.research.google.com/github/video-db/videodb-cookbook/blob/nb/storybook/examples/Storybook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

### Installing the required packages

In [None]:
!pip install videodb openai

### API keys

Before proceeding, ensure access to [VideoDB](https://videodb.io), [OpenAI](https://openai.com), and [ElevenLabs](https://elevenlabs.io) API key. If not, sign up for API access on the respective platforms.

> You can get VideoDB's API key from 👉🏼 [VideoDB Console](https://console.videodb.io). ( Free for first 50 uploads, **No credit card required!** )

In [95]:
import os

os.environ["OPENAI_API_KEY"] = ""
os.environ["ELEVEN_LABS_API_KEY"] = ""
os.environ["VIDEO_DB_API_KEY"] = ""

### ElevenLab's Voice ID 

You will also need ElevenLab's VoiceID of a Voice that you want to use.

Please add [this](https://elevenlabs.io/app/voice-lab/share/eea2654def1e6c5bda5b4ce8f99f8f2c857b71a15cd6188c27d337206ea98177/s6sJbrmNIsT6M7vdjjES) Voice to Your VoiceLab and copy VoiceID from there.



In [96]:
voiceover_artist_id = "LHgFk7RaIiyNE5I3pC1d"

## Define Your App

In [97]:
app_description = "A meditation app for busy people with anxiety."
app_steps = [
    "Set up profile",
    "Select preference for theme & music",
    "Set meditation session timing",
    "Start the session"
]

In [98]:
steps = []
for app_step in app_steps:
    steps.append({'step': app_step})

## Assets Generation

### Steps -> Step Description

In [99]:
import openai
import json


prompt = f"Generate a structured response for {app_description}. in the user journey. This description should capture the essence of the action performed by the user during this step. This application aims to {app_description}. Here are the steps involved in the user journey, Elaborate the each step and involved the specifc steps requird in the stage:"
for step in steps:
    prompt += f"\n- Create a concise description for the step '{step['step']}' in the user journey. This description should capture the essence of the action performed by the user during this step."
prompt += "Return a response in json fromat, with key 'steps', and value being a list of strings, where each string is Step Description: }."

client = openai.OpenAI()
openai_res = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "system", "content": prompt}],
    response_format={"type": "json_object"}
)

openai_res = json.loads(openai_res.choices[0].message.content)
for (index, step) in enumerate(openai_res['steps']):
    steps[index]['step_description'] = step

### Dalle Image Genration Function

In [101]:
def generate_image_dalle(step, app_description):
    consistent_part = "Create an illustration in a simple and minimalist, duotone style, pink and black, with pastel colours and a single central character of a woman with short hair."
    variable_part = f"This illustration is a part of a storyboard to explain the user journey of an app built for {app_description}. This image will portray the '{step}' stage in the app.Step description: {step['step_description']}. Now, use this information to create an illustration in the style of Hayao Miyazaki"
    prompt = f"{consistent_part}\n- {variable_part}"

    try:
        response = client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            n=1,
            size="1024x1024"
        )
        return response.data[0].url  # Assuming the API returns the image URL
    except Exception as e:
        print(f"An error occurred while generating the image: {e}")
        return None

### Voiceover : Create Voice Script - OpenAI

In [102]:
def generate_voiceover_script(step):
    """Generates a voiceover script based on the step description."""

    prompt=f"Create a conversational and engaging script for an app where the user is {step['step_description']}. Keep it narrative-driven, within two sentences."

    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages = [{"role": "system", "content": prompt }]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"An error occurred while generating the voiceover script: {e}")
        return None

### Voiceover: Create Voiceover Audio - Elevenlabs

In [103]:
import requests
def generate_voiceover_audio(script, file):
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voiceover_artist_id}"
    headers = {
        "xi-api-key": os.environ.get("ELEVEN_LABS_API_KEY"),
        "Content-Type": "application/json"
    }
    payload = {
        "model_id": "eleven_monolingual_v1",
        "text": script,
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }
    elevenlabs_res = requests.request("POST", url, json=payload, headers=headers)


    # Save the audio file
    CHUNK_SIZE = 1024
    with open(file, 'wb') as f:
        for chunk in elevenlabs_res.iter_content(chunk_size=CHUNK_SIZE):
            if chunk:
                f.write(chunk)

### Putting Image Generation and Audio Genration Together

In [107]:
def process_user_journey(app_description, steps):
    """Processes each step of the user journey, generating scripts, and images with consistent character depiction."""
    print("App Description:", app_description)
    
    for (index,step) in enumerate(steps):
        print(f"\nProcessing step: {step}")
        
        voiceover_script = generate_voiceover_script(step)
        if voiceover_script:
            voiceover_file_name = f"voiceover_{index}.mp3"
            step['voiceover_script'] = voiceover_script
            step['voiceover_filename'] = voiceover_file_name
            generate_voiceover_audio(voiceover_script, voiceover_file_name)
        image_url = generate_image_dalle(step, app_description)
        if image_url:
            step['image_url'] = image_url

In [None]:
process_user_journey(app_description, steps)

## Creating a Video with Genreated Assets

### Setup VideoDB Connection

In [114]:
from videodb import connect

conn = connect()
coll = conn.get_collection()

### Upload Assets to VideoDB

In [None]:
from videodb import MediaType
for step in steps:
    image = coll.upload(url=step['image_url'], media_type=MediaType.image)
    audio = coll.upload(file_path=step['voiceover_filename'])
    step['image_id'] = image.id
    step['audio_id'] = audio.id

### Calculate Total duration of Video

In [121]:
total_duration = 0
for step in steps:
    audio = coll.get_audio(step['audio_id'])
    total_duration += float(audio.length)

### Add a base video (not needed if image add_inline is available)

In [123]:
from videodb.timeline import Timeline
from videodb.asset import VideoAsset

base_vid = coll.get_video("m-5f974bc5-19b4-4e7f-a13b-1992d405917b")
base_vid_aset = VideoAsset(base_vid.id, end=total_duration)
timeline = Timeline(conn)

timeline.add_inline(base_vid_aset)

In [None]:
from videodb.asset import VideoAsset, ImageAsset, AudioAsset
from videodb import play_stream

seeker = 0
for step in steps:
    audio = coll.get_audio(step['audio_id'])
    image = coll.get_image(step['image_id'])
    audio_duration = float(audio.length)

    image_asset = ImageAsset(image.id, duration=audio_duration, x=100, y=0, width=1080 , height=720)
    audio_asset = AudioAsset(audio.id, disable_other_tracks=True)

    timeline.add_overlay(seeker, audio_asset)
    timeline.add_overlay(seeker, image_asset)

    seeker += audio_duration

stream_url = timeline.generate_stream()
play_stream(stream_url)