In [1]:
from manual_influencer import constants, script_generator

mjson = script_generator.generate_script(text=constants.TEXT_AND_PDF_TO_SCENES_PROMPT)

{
  "scenes": [
    {
      "visual": "PowerLine Webcast 3 Adapter in Steckdose stecken.",
      "audio": "Bevor du das Gerät in Betrieb nimmst, check die Lieferung auf Vollständigkeit, sonst gibt es am Ende noch einen Schadensfall. Dann schliesse den TechniSat PowerLine Webcast 3 mit dem Netzwerkkabel an deinen Netzwerkanschluss an, um deine Privatsphäre im Netzwerk zu schützen. Achte darauf, das Gerät nicht in direkter Nähe eines Heizkörpers aufzustellen!"
    }
  ]
}

In [2]:
import json
data = json.loads(mjson)

In [3]:
from google import genai
from google.genai import types
import base64

def generate(prompt):
  client = genai.Client(
      vertexai=True,
      project="bliss-hack25fra-9587",
      location="global",
  )

  si_text1 = """make an english description of an image matching this scene description. black and white, rough sketch, cartoonish style. only answer with the description. no other text."""

  model = "gemini-2.0-flash-001"
  contents = [
    types.Content(
      role="user",
      parts=[
        types.Part.from_text(text=prompt)
      ]
    ),
  ]
  generate_content_config = types.GenerateContentConfig(
    temperature = 1,
    top_p = 0.95,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=si_text1)],
  )

  mstr = ''

  for chunk in client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    print(chunk.text, end="")
    mstr += chunk.text
  
  return mstr
    

#generate()

In [4]:
from vertexai.preview.vision_models import ImageGenerationModel
import vertexai

#pmt = "A close-up, roughly sketched, black and white cartoon drawing of a powerline adapter, suggesting a simple, perhaps technical illustration."
def generate_image(prompt, filename):
    print("Prompt: " + prompt)
    vertexai.init(project="bliss-hack25fra-9587", location="us-central1")
    generation_model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-002")

    images = generation_model.generate_images(
        prompt=prompt,
        number_of_images=4,
        aspect_ratio="9:16",
        negative_prompt="",
        person_generation="",
        safety_filter_level="",
        add_watermark=True,
    )

    images[0].save(filename)

In [5]:
import cv2
import os
import subprocess
import imageio_ffmpeg
import ffmpeg

def generate_video(image_folder = 'data/images', audio_file = './data/synthesis.wav', output_file = './data/out.mp4'):
    try:
        os.remove('./finished_video.mp4')
        os.remove('./video.avi')
        os.remove('./video.mp4')
    except:
        pass

    images = [img for img in os.listdir(image_folder) if img.endswith(".png")]
    frame = cv2.imread(os.path.join(image_folder, images[0]))
    height, width, layers = frame.shape

    video = cv2.VideoWriter('./video.avi', 0, 0.25, (width,height))

    for image in images:
        video.write(cv2.imread(os.path.join(image_folder, image)))

    #cv2.destroyAllWindows()
    video.release()



    ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
    subprocess.run([
        ffmpeg_exe,
        '-y',               # overwrite output if exists
        '-i', 'video.avi',  # input video
        '-c:v', 'libx264',  # video codec
        '-c:a', 'aac',      # audio codec
        '-shortest',        # finish when the shorter stream ends
        'video.mp4'         # output file
    ])
    
    subprocess.run([
        ffmpeg_exe,
        '-y',
        '-i', 'video.mp4',
        '-i', audio_file,
        '-c:v', 'copy',
        '-c:a', 'aac',
        '-shortest',
        output_file
    ])

    #input_video = ffmpeg.input('./video.mp4')

    #input_audio = ffmpeg.input(audio_file)

    #ffmpeg.concat(input_video, input_audio, v=1, a=1).output(output_file).run(cmd=ffmpeg_exe)
    os.remove('./video.avi')
    os.remove('./video.mp4')

In [6]:
import subprocess
import json
import wave
import contextlib

def get_audio_duration(filepath):
    with contextlib.closing(wave.open(filepath,'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
        print(duration)

In [7]:
import subprocess
import json
import wave
import contextlib
from mutagen.mp3 import MP3

def get_audio_duration(filepath):
    audio = MP3(filepath)
    print(audio.info.length)
    return audio.info.length

In [None]:
from manual_influencer.tts import synthesize_speech
import shutil

import os

folder = './data/visual'
for f in os.listdir(folder):
    path = os.path.join(folder, f)
    if os.path.isfile(path):
        os.remove(path)

images = []
idx = 0

for scene in data['scenes']:
    print(scene)
    visual = generate(scene['visual'])
    synthesize_speech(text=scene['audio'], output_file='./data/audio/output.mp3')
    time = get_audio_duration('./data/audio/output.mp3')
    for i in range(1, int(time/4+1)):
        if i < time:
            generate_image(visual, './data/visual/images_00' + str(i) + '.png')
    generate_video(image_folder='./data/visual', audio_file='./data/audio/output.mp3', output_file = str(idx) + '.mp4')
    idx += 1
    break

{'visual': 'PowerLine Webcast 3 Adapter in Steckdose stecken.', 'audio': 'Bevor du das Gerät in Betrieb nimmst, check die Lieferung auf Vollständigkeit, sonst gibt es am Ende noch einen Schadensfall. Dann schliesse den TechniSat PowerLine Webcast 3 mit dem Netzwerkkabel an deinen Netzwerkanschluss an, um deine Privatsphäre im Netzwerk zu schützen. Achte darauf, das Gerät nicht in direkter Nähe eines Heizkörpers aufzustellen!'}
A cartoonish, rough sketch in black and white depicting a PowerLine Webcast 3 adapter being plugged into an electrical outlet.
Audio content written to file "./data/audio/output.mp3"
22.344
Prompt: A cartoonish, rough sketch in black and white depicting a PowerLine Webcast 3 adapter being plugged into an electrical outlet.

Prompt: A cartoonish, rough sketch in black and white depicting a PowerLine Webcast 3 adapter being plugged into an electrical outlet.

Prompt: A cartoonish, rough sketch in black and white depicting a PowerLine Webcast 3 adapter being plugged