In [1]:
from manual_influencer import constants, script_generator

mjson = script_generator.generate_script(text=constants.TEXT_AND_PDF_TO_SCENES_PROMPT)

{
  "scenes": [
    {
      "visual": "Person hantiert mit einem Powerline Adapter",
      "audio": "Bevor Sie die TechniSat PowerLine Webcast 3 Adapter in Betrieb nehmen, sollten Sie prüfen ob das Gerät in die Steckdose gesteckt ist. Sollte sich Ihr Einsatz einer Mehrfachsteckdose nicht vermeiden lassen, testen Sie die Verbindung mit nahe beieinanderliegenden Steckdosen. Schliessen Sie beide Geräte wie im Vorfeld beschrieben an, danach müssen Sie das Gerät unbedingt am Versorgungsnetz betreiben! Helfen die beschriebenen Schritte nicht, wenden Sie sich an die Hotline."
    }
  ]
}

In [2]:
import json
data = json.loads(mjson)

In [3]:
from google import genai
from google.genai import types
import base64

def generate(prompt):
    client = genai.Client(
      vertexai=True,
      project="bliss-hack25fra-9587",
      location="global",
    )

    si_text1 = """make an english description of an image matching this scene description. black and white, rough sketch, cartoonish style. only answer with the description. no other text."""

    model = "gemini-2.0-flash-001"
    contents = [
        types.Content(
          role="user",
          parts=[
            types.Part.from_text(text=prompt)
          ]
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        temperature = 1,
        top_p = 0.95,
        max_output_tokens = 8192,
        response_modalities = ["TEXT"],
        safety_settings = [types.SafetySetting(
          category="HARM_CATEGORY_HATE_SPEECH",
          threshold="OFF"
        ),types.SafetySetting(
          category="HARM_CATEGORY_DANGEROUS_CONTENT",
          threshold="OFF"
        ),types.SafetySetting(
          category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
          threshold="OFF"
        ),types.SafetySetting(
          category="HARM_CATEGORY_HARASSMENT",
          threshold="OFF"
        )],
        system_instruction=[types.Part.from_text(text=si_text1)],
    )

    mstr = ''

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        mstr += chunk.text

    return mstr
    

#generate()

In [4]:
from vertexai.preview.vision_models import ImageGenerationModel
import vertexai

#pmt = "A close-up, roughly sketched, black and white cartoon drawing of a powerline adapter, suggesting a simple, perhaps technical illustration."
def generate_image(prompt, filename):
    print("Prompt: " + prompt)
    vertexai.init(project="bliss-hack25fra-9587", location="us-central1")
    generation_model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-002")

    images = generation_model.generate_images(
        prompt=prompt,
        number_of_images=4,
        aspect_ratio="9:16",
        negative_prompt="",
        person_generation="",
        safety_filter_level="",
        add_watermark=True,
    )

    images[0].save(filename)

In [5]:
import cv2
import os
import subprocess
import imageio_ffmpeg
import ffmpeg

def generate_video(image_folder = 'data/images', audio_file = './data/synthesis.wav', output_file = './data/out.mp4'):
    try:
        os.remove('./finished_video.mp4')
        os.remove('./video.avi')
        os.remove('./video.mp4')
    except:
        pass

    images = [img for img in os.listdir(image_folder) if img.endswith(".png")]
    frame = cv2.imread(os.path.join(image_folder, images[0]))
    height, width, layers = frame.shape

    video = cv2.VideoWriter('./video.avi', 0, 0.25, (width,height))

    for image in images:
        video.write(cv2.imread(os.path.join(image_folder, image)))

    #cv2.destroyAllWindows()
    video.release()



    ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
    subprocess.run([
        ffmpeg_exe,
        '-y',               # overwrite output if exists
        '-i', 'video.avi',  # input video
        '-c:v', 'libx264',  # video codec
        '-c:a', 'aac',      # audio codec
        '-shortest',        # finish when the shorter stream ends
        'video.mp4'         # output file
    ])
    
    subprocess.run([
        ffmpeg_exe,
        '-y',
        '-i', 'video.mp4',
        '-i', audio_file,
        '-c:v', 'copy',
        '-c:a', 'aac',
        '-shortest',
        output_file
    ])

    #input_video = ffmpeg.input('./video.mp4')

    #input_audio = ffmpeg.input(audio_file)

    #ffmpeg.concat(input_video, input_audio, v=1, a=1).output(output_file).run(cmd=ffmpeg_exe)
    os.remove('./video.avi')
    os.remove('./video.mp4')

In [6]:
import subprocess
import json
import wave
import contextlib
from mutagen.mp3 import MP3

def get_audio_duration(filepath):
    audio = MP3(filepath)
    print(audio.info.length)
    return audio.info.length

In [7]:
import ffmpeg
import imageio_ffmpeg
import os

def concat_videos(video_paths, output_path='concatenated.mp4'):
    # Create the concat input text file
    with open('concat_list.txt', 'w') as f:
        for path in video_paths:
            f.write(f"file '{os.path.abspath(path)}'\n")

    ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()

    # Run ffmpeg concat
    subprocess.run([
        ffmpeg_exe,
        '-f', 'concat',
        '-safe', '0',
        '-i', 'concat_list.txt',
        '-c', 'copy',
        output_path
    ])

    os.remove('concat_list.txt')

In [None]:
from manual_influencer.tts import synthesize_speech
import shutil
from pathlib import Path
import os

folder = './data/visual'
for f in os.listdir(folder):
    path = os.path.join(folder, f)
    if os.path.isfile(path):
        os.remove(path)
        
try:
    os.remove('./concatenated.mp4')
except:
    pass

for mp4_file in Path('.').glob('*.mp4'):
    try:
        os.remove(mp4_file)
    except:
        pass

images = []
idx = 0

for scene in data['scenes']:
    print(scene)
    visual = "A roughly sketched, cartoonish black and white image with no text; "
    visual += generate(scene['visual'])
    synthesize_speech(text=scene['audio'], output_file='./data/audio/output.mp3')
    time = get_audio_duration('./data/audio/output.mp3')
    for i in range(1, int(time/4+1)):
        generate_image(visual, './data/visual/images_00' + str(i) + '.png')
    generate_video(image_folder='./data/visual', audio_file='./data/audio/output.mp3', output_file = str(idx) + '.mp4')
    for f in os.listdir(folder):
        path = os.path.join(folder, f)
        if os.path.isfile(path):
            os.remove(path)
    idx += 1

{'visual': 'Person hantiert mit einem Powerline Adapter', 'audio': 'Bevor Sie die TechniSat PowerLine Webcast 3 Adapter in Betrieb nehmen, sollten Sie prüfen ob das Gerät in die Steckdose gesteckt ist. Sollte sich Ihr Einsatz einer Mehrfachsteckdose nicht vermeiden lassen, testen Sie die Verbindung mit nahe beieinanderliegenden Steckdosen. Schliessen Sie beide Geräte wie im Vorfeld beschrieben an, danach müssen Sie das Gerät unbedingt am Versorgungsnetz betreiben! Helfen die beschriebenen Schritte nicht, wenden Sie sich an die Hotline.'}
A simple, cartoonish sketch of a person fiddling with a powerline adapter.
Audio content written to file "./data/audio/output.mp3"
28.392
Prompt: A roughly sketched, cartoonish black and white image; A simple, cartoonish sketch of a person fiddling with a powerline adapter.

Prompt: A roughly sketched, cartoonish black and white image; A simple, cartoonish sketch of a person fiddling with a powerline adapter.

Prompt: A roughly sketched, cartoonish bla

In [None]:
from pathlib import Path

ml = []

for mp4_file in Path('.').glob('*.mp4'):
    ml.append(mp4_file)
    
concat_videos(ml)