### Pipeline

1) Генерируем картинки
2) Генирируем тексты
3) Объединяем картинки и тексты с помощью replicate
4) Объединяем картинки в видео

In [1]:
from PIL import Image, ImageDraw, ImageFont
import requests
import json
import replicate
import os
import time
import hashlib

In [9]:
# Images
image_filenames = ["image_outputs\\3_horses.jpg", "image_outputs\\3_horses_2.jpeg", "image_outputs\\cat_in_sweater.png", "image_outputs\\christmas_tree.jpg"]

In [3]:
# Texts

def generate_text(text: str, 
                  font_name: str = 'morninggloryandcyrillic.otf', 
                  backgroud_name: str = 'empty_background.jpg',
                  output_dir: str = 'image_outputs',
                  font_size: int = 112, 
                  text_color: tuple[int, int, int] = (200, 29, 156),
                  ) -> str:
    """
    Generate specified text with empty background. 
    font_name is the name of a font file located in the working directory or in C\\Windows\\Fonts.
    """
    # empty background
    image = Image.open(backgroud_name)
    image = image.convert('RGB')

    # Initialize drawing context
    draw = ImageDraw.Draw(image)

    # Configure text properties
    font = ImageFont.truetype(font_name, font_size)  # load Cyrillic-capable font

    # Calculate text position (centered)
    text_bbox = draw.textbbox((0, 0), text, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    x = (image.width - text_width) / 2
    y = (image.height - text_height) / 2

    # Draw text on image
    draw.text((x, y), text, fill=text_color, font=font)

    # Save the image
    time_str = str(time.time_ns())
    file_name = hashlib.md5(time_str.encode()).hexdigest()
    file_path = os.path.join(output_dir, str(file_name) + '.png')
    image.save(file_path)

    return file_path

In [4]:
# Making empty images with text
texts = ['С Новым годом!', "Пусть ваши мечты сбываются", "В год кота к вам\nобязательно прибудет счастье", "Увидимся в 2026!"]
text_filenames = [generate_text(t, font_size=int(120 - 1.2*len(t))) for t in texts]


In [12]:
COMBINATION_PROMPT = "Add an inscription from the second image to the top or to the bottom of the first image. Do not add any extra text " \
"that is not on my images. You can move the text freely to the best location. The result should look like a card my grandmother might send me."


def add_text_to_image(text_image_path: str, 
                      main_image_path: str, 
                      output_dir: str = 'image_outputs',
                      output_size: str = '1K', 
                      aspect_ratio: str = '4:3',
                      save_locally: bool = True) -> str:
    """
    Combine one main image with one text image.
    Must have REPLICATE_API_TOKEN=... in the .env
    """
    #TODO: make async

    # Upload images to replicate to be able to use them
    with open(text_image_path, "rb") as file:
        text_image = replicate.files.create(file)
    with open(main_image_path, 'rb') as file:
        generated_image = replicate.files.create(file)


    input = {
        "size": output_size,
        "prompt": COMBINATION_PROMPT, 
        "aspect_ratio": aspect_ratio,
        "image_input": [generated_image.urls['get'], text_image.urls['get']]
    }

    output = replicate.run(
        "bytedance/seedream-4",
        input=input
    )

    # To write the files to disk:
    if save_locally:
        for index, item in enumerate(output):
            time_str = str(time.time_ns())
            file_name = hashlib.md5(time_str.encode()).hexdigest()
            with open(os.path.join(output_dir, str(file_name) + '.jpg'), "wb") as file:
                file.write(item.read())
            
    return str(output[0])



In [13]:
images_with_text = [add_text_to_image(text_filenames[i], image_filenames[i]) for i in range(len(text_filenames))]

In [None]:
images_with_text

In [4]:
# Combining images into a video
API_KEY = os.environ['SHOT_STACK_API_TOKEN']

url = "https://api.shotstack.io/stage/render"
headers = {
    "x-api-key": API_KEY,
    "Content-Type": "application/json"
}

payload = {
    "timeline": {
        "tracks": [
            {
                "clips": [
                    {
                        "asset": {"type": "image", "src": images_with_text[0]},
                        "start": 0,
                        "length": 3,
                        "transition": {"out": "slideRight"}
                    },
                    {
                        "asset": {"type": "image", "src": images_with_text[1]},
                        "start": 3,
                        "length": 3,
                        "transition": {"in": "wipeLeft", "out": "fade"}
                    },
                    {
                        "asset": {"type": "image", "src": images_with_text[2]},
                        "start": 6,
                        "length": 3,
                        "transition": {"in": "slideUp", "out": "slideDown"}
                    },
                    {
                        "asset": {"type": "image", "src": images_with_text[3]},
                        "start": 9,
                        "length": 3,
                        "transition": {"in": "fade"}
                    }
                ]
            }
        ]
    },
    "output": {"format": "mp4", "resolution": "hd", "aspectRatio": "4:3"}
}

response = requests.post(url, headers=headers, data=json.dumps(payload))
print(response.status_code, response.text)


201 {"success":true,"message":"Created","response":{"message":"Render Successfully Queued","id":"5d84a93b-11af-430b-8a84-6f37f1fe4820"}}


In [5]:
# Wait a bit before running
RENDER_ID = json.loads(response.text)['response']['id']
print(RENDER_ID)
url = "https://api.shotstack.io/stage/render"
r = requests.get(url + f"/{RENDER_ID}", headers=headers)

print(r.json())
print("Video url:", r.json()['response']['url'])

5d84a93b-11af-430b-8a84-6f37f1fe4820
{'success': True, 'message': 'OK', 'response': {'id': '5d84a93b-11af-430b-8a84-6f37f1fe4820', 'owner': '1mj5p7ly7k', 'plan': 'freeTrial', 'status': 'done', 'error': '', 'duration': 12, 'billable': 12, 'renderTime': 3751.72, 'url': 'https://shotstack-api-stage-output.s3-ap-southeast-2.amazonaws.com/1mj5p7ly7k/5d84a93b-11af-430b-8a84-6f37f1fe4820.mp4', 'poster': None, 'thumbnail': None, 'data': {'output': {'format': 'mp4', 'resolution': 'hd', 'aspectRatio': '4:3'}, 'timeline': {'tracks': [{'clips': [{'start': 0, 'length': 3, 'asset': {'type': 'image', 'src': 'https://replicate.delivery/xezq/IobweNx5pWWzRyQSPhujOr5BzSg2Z6ngOcRH9xKKmmRP1VuKA/tmpnvizjhgl.jpg'}, 'transition': {'out': 'slideRight'}}, {'start': 3, 'length': 3, 'asset': {'type': 'image', 'src': 'https://replicate.delivery/xezq/UXYeYnPZfFjYP0erhXgZRTtBbzeHIf4OnECi4foVn8Hau6KXF/tmpkyhsyefb.jpg'}, 'transition': {'in': 'wipeLeft', 'out': 'fade'}}, {'start': 6, 'length': 3, 'asset': {'type': 'ima