In [24]:
from manual_influencer import constants, script_generator

mjson = script_generator.generate_script(text=constants.TEXT_AND_PDF_TO_SCENES_PROMPT)

{
  "scenes": [
    {
      "visual": "Nahaufnahme eines Powerline Webcast 3 Adapters",
      "audio": "Bevor Sie die TechniSat PowerLine Webcast 3 Adapter in Betrieb nehmen, stellen Sie sicher, dass Sie ihn ausschliesslich an einem trockenen Standort verwenden."
    },
    {
      "visual": "Jemand versucht, einen Stift in eine Steckdose zu stecken",
      "audio": "Stecken Sie keine Gegenstände in die Öffnungen des Gerätes, denn es besteht Stromschlaggefahr!"
    },
    {
      "visual": "Bild eines Gerätes in direkter Sonneneinstrahlung",
      "audio": "Setzen Sie das Gerät nicht direkter Sonneneinstrahlung aus, da dies zu Beschädigungen des Gehäuses führen kann."
    },
    {
      "visual": "Jemand schliesst mehrere Geräte über eine Mehrfachsteckdose an den Powerline Webcast 3 an",
      "audio": "Wenn Sie weitere Netzwerkgeräte mittels Mehrfachsteckdose anschliessen, stecken Sie diese Mehrfachsteckdose in die Steckdose des PowerLine Webcast 3. Leuchtet die Power LED dauerhaft, i

In [25]:
import json
data = json.loads(mjson)

In [29]:
from google import genai
from google.genai import types
import base64

def generate(prompt):
  client = genai.Client(
      vertexai=True,
      project="bliss-hack25fra-9587",
      location="global",
  )

  si_text1 = """make an english description of an image matching this scene description. black and white, rough sketch, cartoonish style. only answer with the description. no other text."""

  model = "gemini-2.0-flash-001"
  contents = [
    types.Content(
      role="user",
      parts=[
        types.Part.from_text(text=prompt)
      ]
    ),
  ]
  generate_content_config = types.GenerateContentConfig(
    temperature = 1,
    top_p = 0.95,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=si_text1)],
  )

  mstr = ''

  for chunk in client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    print(chunk.text, end="")
    mstr += chunk.text
  
  return mstr
    

#generate()

In [27]:
from vertexai.preview.vision_models import ImageGenerationModel
import vertexai

#pmt = "A close-up, roughly sketched, black and white cartoon drawing of a powerline adapter, suggesting a simple, perhaps technical illustration."
def generate_image(prompt, filename):
    print("Prompt: " + prompt)
    vertexai.init(project="bliss-hack25fra-9587", location="us-central1")
    generation_model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-002")

    images = generation_model.generate_images(
        prompt=prompt,
        number_of_images=4,
        aspect_ratio="9:16",
        negative_prompt="",
        person_generation="",
        safety_filter_level="",
        add_watermark=True,
    )

    images[0].save(filename)

In [41]:
import cv2
import os
import subprocess
import imageio_ffmpeg
import ffmpeg

def generate_video(image_folder = 'data/images', audio_file = './data/synthesis.wav', output_file = './data/out.mp4'):
    try:
        os.remove('./finished_video.mp4')
        os.remove('./video.avi')
        os.remove('./video.mp4')
    except:
        pass

    images = [img for img in os.listdir(image_folder) if img.endswith(".png")]
    frame = cv2.imread(os.path.join(image_folder, images[0]))
    height, width, layers = frame.shape

    video = cv2.VideoWriter('./video.avi', 0, 1, (width,height))

    for image in images:
        video.write(cv2.imread(os.path.join(image_folder, image)))

    #cv2.destroyAllWindows()
    video.release()



    ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
    subprocess.run([
        ffmpeg_exe,
        '-y',               # overwrite output if exists
        '-i', 'video.avi',  # input video
        '-c:v', 'libx264',  # video codec
        '-c:a', 'aac',      # audio codec
        '-shortest',        # finish when the shorter stream ends
        'video.mp4'         # output file
    ])
    
    subprocess.run([
        ffmpeg_exe,
        '-y',
        '-i', 'video.mp4',
        '-i', audio_file,
        '-c:v', 'copy',
        '-c:a', 'aac',
        '-shortest',
        output_file
    ])

    #input_video = ffmpeg.input('./video.mp4')

    #input_audio = ffmpeg.input(audio_file)

    #ffmpeg.concat(input_video, input_audio, v=1, a=1).output(output_file).run(cmd=ffmpeg_exe)
    os.remove('./video.avi')
    os.remove('./video.mp4')

In [43]:
from manual_influencer.tts import synthesize_speech

images = []
idx = 0

for scene in data['scenes']:
    print(scene)
    visual = generate(scene['visual'])
    generate_image(visual, './data/visual/images_001.png')
    generate_image(visual, './data/visual/images_002.png')
    generate_image(visual, './data/visual/images_003.png')
    generate_image(visual, './data/visual/images_004.png')
    synthesize_speech(text=scene['audio'], output_file='./data/audio/output.mp3')
    generate_video(image_folder='./data/visual', audio_file='./data/audio/output.mp3', output_file = str(idx) + '.mp4')
    idx += 1

{'visual': 'Nahaufnahme eines Powerline Webcast 3 Adapters', 'audio': 'Bevor Sie die TechniSat PowerLine Webcast 3 Adapter in Betrieb nehmen, stellen Sie sicher, dass Sie ihn ausschliesslich an einem trockenen Standort verwenden.'}
A close-up, roughly sketched cartoon of a Powerline Webcast 3 Adapter in black and white.
Prompt: A close-up, roughly sketched cartoon of a Powerline Webcast 3 Adapter in black and white.

Audio content written to file "./data/audio/output.mp3"
{'visual': 'Jemand versucht, einen Stift in eine Steckdose zu stecken', 'audio': 'Stecken Sie keine Gegenstände in die Öffnungen des Gerätes, denn es besteht Stromschlaggefahr!'}


ffmpeg version 7.0.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --enable-libxvid --enable-libzvbi --enable-libzimg
  libavutil      59.  8.100 / 59.  8.100
  libavcodec     61.  3.100 / 61.  3.100
  libavformat   

A figure, rendered in a rough, cartoonish style, attempts to insert a pen into an electrical outlet.
Prompt: A figure, rendered in a rough, cartoonish style, attempts to insert a pen into an electrical outlet.

Audio content written to file "./data/audio/output.mp3"


ffmpeg version 7.0.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --enable-libxvid --enable-libzvbi --enable-libzimg
  libavutil      59.  8.100 / 59.  8.100
  libavcodec     61.  3.100 / 61.  3.100
  libavformat   

{'visual': 'Bild eines Gerätes in direkter Sonneneinstrahlung', 'audio': 'Setzen Sie das Gerät nicht direkter Sonneneinstrahlung aus, da dies zu Beschädigungen des Gehäuses führen kann.'}
A cartoonish, roughly sketched, black and white image of a device sitting in direct sunlight.
Prompt: A cartoonish, roughly sketched, black and white image of a device sitting in direct sunlight.

Audio content written to file "./data/audio/output.mp3"


ffmpeg version 7.0.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --enable-libxvid --enable-libzvbi --enable-libzimg
  libavutil      59.  8.100 / 59.  8.100
  libavcodec     61.  3.100 / 61.  3.100
  libavformat   

{'visual': 'Jemand schliesst mehrere Geräte über eine Mehrfachsteckdose an den Powerline Webcast 3 an', 'audio': 'Wenn Sie weitere Netzwerkgeräte mittels Mehrfachsteckdose anschliessen, stecken Sie diese Mehrfachsteckdose in die Steckdose des PowerLine Webcast 3. Leuchtet die Power LED dauerhaft, ist der Reset abgeschlossen. '}
A rough, cartoonish sketch in black and white depicts a power strip connected to a "Powerline Webcast 3" device, with multiple electronic devices plugged into the power strip.
Prompt: A rough, cartoonish sketch in black and white depicts a power strip connected to a "Powerline Webcast 3" device, with multiple electronic devices plugged into the power strip.

Audio content written to file "./data/audio/output.mp3"


ffmpeg version 7.0.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --enable-libxvid --enable-libzvbi --enable-libzimg
  libavutil      59.  8.100 / 59.  8.100
  libavcodec     61.  3.100 / 61.  3.100
  libavformat   