# Basic Setup

In [77]:
pip install -q bentoml

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython 8.27.0 requires prompt-toolkit<3.1.0,>=3.0.41, but you have prompt-toolkit 3.0.36 which is incompatible.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [107]:
import bentoml
import json
import os
import requests
import shutil

from dotenv import load_dotenv
from jinja2 import Template
from litellm import completion
from pydantic import BaseModel
from pydantic.json import pydantic_encoder
from typing import List
from IPython.display import Image
from pathlib import Path
from uuid import uuid4

In [113]:
load_dotenv()

True

# Download the document

In [30]:
# Download the raw text and strip out all the gutenberg start stuff
doc_response = requests.get(
    "https://www.gutenberg.org/cache/epub/23962/pg23962.txt",
)
doc = doc_response.text
strip_index = doc.find("第一回")
doc = doc[strip_index:].strip()
doc[:100]

'第一回     靈根育孕源流出\u3000心性修持大道生\r\n\r\n\r\n\u3000\u3000詩曰：\r\n\u3000\u3000\u3000\u3000混沌未分天地亂，茫茫渺渺無人見。\r\n\u3000\u3000\u3000\u3000自從盤古破鴻濛，開闢從茲清濁辨。\r\n\u3000\u3000\u3000\u3000覆載群生仰至仁，發明萬物皆成善。'

In [32]:
# Split everything into paragraphs and remove the added
# superfluous newlines
raw_paragraphs = doc.split("\r\n\r\n")
paragraphs = [p.replace("\r\n","") for p in raw_paragraphs]
paragraphs[:5]

['第一回     靈根育孕源流出\u3000心性修持大道生',
 '\u3000\u3000詩曰：\u3000\u3000\u3000\u3000混沌未分天地亂，茫茫渺渺無人見。\u3000\u3000\u3000\u3000自從盤古破鴻濛，開闢從茲清濁辨。\u3000\u3000\u3000\u3000覆載群生仰至仁，發明萬物皆成善。\u3000\u3000\u3000\u3000欲知造化會元功，須看西遊釋厄傳。',
 '蓋聞天地之數，有十二萬九千六百歲為一元。將一元分為十二會，乃子、丑、寅、卯、辰、巳、午、未、申、酉、戌、亥之十二支也。每會該一萬八百歲。且就一日而論：子時得陽氣，而丑則雞鳴﹔寅不通光，而卯則日出﹔辰時食後，而巳則挨排﹔日午天中，而未則西蹉﹔申時晡，而日落酉，戌黃昏，而人定亥。譬於大數，若到戌會之終，則天地昏曚而萬物否矣。再去五千四百歲，交亥會之初，則當黑暗，而兩間人物俱無矣，故曰混沌。又五千四百歲，亥會將終，貞下起元，近子之會，而復逐漸開明。邵康節曰：：「冬至子之半，天心無改移。一陽初動處，萬物未生時。」到此，天始有根。再五千四百歲，正當子會，輕清上騰，有日，有月，有星，有辰。日、月、星、辰，謂之四象。故曰，天開於子。又經五千四百歲，子會將終，近丑之會，而逐漸堅實。《易》曰：「大哉乾元！至哉坤元！萬物資生，乃順承天。」至此，地始凝結。再五千四百歲，正當丑會，重濁下凝，有水，有火，有山，有石，有土。水、火、山、石、土，謂之五形。故曰，地闢於丑。又經五千四百歲，丑會終而寅會之初，發生萬物。曆曰：「天氣下降，地氣上升﹔天地交合，群物皆生。」至此，天清地爽，陰陽交合。再五千四百歲，子會將終，近丑之會，而逐漸堅實。《易》曰：「大哉乾元！至哉坤元！萬物資生，乃順承天。」至此，地始凝結。再五千四百歲，正當丑會，重濁下凝，有水，有火，有山，有石，有土。水、火、山、石、土，謂之五形。故曰，地闢於丑。又經五千四百歲，丑會終而寅會之初，發生萬物。曆曰：「天氣下降，地氣上升﹔天地交合，群物皆生。」至此，天清地爽，陰陽交合。再五千四百歲，正當寅會，生人，生獸，生禽，正謂天地人，三才定位。故曰，人生於寅。',
 '感盤古開闢，三皇治世，五帝定倫，世界之間，遂分為四大部洲：曰東勝神洲，曰西牛賀洲，曰南贍部洲，曰北俱蘆洲。這部書單表東勝神洲。海外有一國土，名曰傲來國。國近大海

# Setup our datatypes

In [64]:
class StoryScene(BaseModel):
    """A brief description of a scene in a story."""

    description: str = ""
    """A two or three line description of a scene in a story."""

    characters: List[str] = []
    """The names of characters in the scene, if any."""

class StoryScenes(BaseModel):
    """A series of story scenes."""

    scenes: List[StoryScene]
    """One or more extracted story scenes."""

class ScenePrompt(BaseModel):
    """An image prompt for a story scene."""

    prompt: str
    """A prompt to an image generation model for a story scene."""

    theme: str
    """A description of the selected theme for the prompt."""

class ScenePrompts(BaseModel):
    """A list of Scene Prompts."""

    prompts: List[ScenePrompt]
    """One or more Scene Prompts."""

# Extract some scenes from the first few paragraphs

In [74]:
schema = StoryScenes.model_json_schema()
tools = [
    {
        "type": "function",
        "function": {
            "name": StoryScenes.__name__,
            "description": schema["description"],
            "parameters": schema,
        },
    }
]

In [75]:
prompt_template = Template("""
Extract the major scenes from this series of paragraphs from a story:

<paragraphs>
{% for p in paragraphs %}
{{ p }}
{% endfor %}
</paragraphs>
"""
)

In [54]:
messages = [
    {
        "role": "user", 
        "content": prompt_template.render(
            paragraphs=paragraphs[:5],
        ),
    }
]
response = completion(
    model="anthropic/claude-3-5-sonnet-20240620",
    messages=messages,
    tools=tools,
    tool_choice=StoryScenes.__name__,
)

In [63]:
scenes = []
for tool_call in response.choices[0].message.tool_calls:
    f = tool_call.function
    arguments = f.arguments
    extracted_scenes = StoryScenes(**json.loads(arguments))
    scenes.extend(extracted_scenes.scenes)
print(scenes)

[StoryScene(description='The creation of the universe and the concept of time cycles', characters=['盤古 (Pangu)']), StoryScene(description='The formation of the world through different time periods (子, 丑, 寅, etc.)', characters=[]), StoryScene(description='The division of the world into four great continents', characters=['三皇 (Three Sovereigns)', '五帝 (Five Emperors)']), StoryScene(description='Introduction of the Aolai country and the Flower Fruit Mountain', characters=[]), StoryScene(description='Detailed description of the Flower Fruit Mountain and its magical features', characters=[]), StoryScene(description='The peculiar stone atop the Flower Fruit Mountain', characters=[])]


# Extract Image Prompts for the first scene

In [65]:
scene_prompt_schema = ScenePrompts.model_json_schema()
scene_prompt_tools = [
    {
        "type": "function",
        "function": {
            "name": ScenePrompts.__name__,
            "description": scene_prompt_schema["description"],
            "parameters": scene_prompt_schema,
        },
    }
]

In [67]:
scene_prompt_template = Template("""
Given the specified scene related to the paragraphs below, extract multiple prompts to an image generation model.  
For each prompt, pick a creative artistic theme to follow.

<scene>
{{ scene }}
</scene>

<paragraphs>
{% for p in paragraphs %}
{{ p }}
{% endfor %}
</paragraphs>
"""
)

In [72]:
response = completion(
    model="anthropic/claude-3-5-sonnet-20240620",
    messages=[
        {
            "role": "user", 
            "content": scene_prompt_template.render(
                paragraphs=paragraphs[:5],
                scene=scenes[0],
            ),
        }
    ],
    tools=scene_prompt_tools,
    tool_choice=ScenePrompts.__name__,
)

In [80]:
extracted_prompts = []
for tool_call in response.choices[0].message.tool_calls:
    f = tool_call.function
    arguments = f.arguments
    ep = ScenePrompts(**json.loads(arguments))
    extracted_prompts.extend(ep.prompts)
extracted_prompts[0]

ScenePrompt(prompt='Pangu, a primordial giant, emerging from a cosmic egg, splitting the chaotic darkness into sky and earth, with swirling energies of yin and yang surrounding him', theme='Ancient Chinese mythology meets abstract expressionism')

# Generate the images

In [None]:
with bentoml.SyncHTTPClient(os.getenv("FLUX_SERVER_URL")) as client:
    result = client.txt2img(
        prompt=extracted_prompts[2].prompt,
        num_inference_steps=20,
    )
    image_path = Path(f"data/result-{str(uuid4())}.png")
    shutil.move(result, image_path)
Image(
    filename=str(image_path),
)

# Backup all the extracted objects

In [112]:
with open('data/extracted_scenes.json', 'w') as f:
    f.write(json.dumps(scenes, default=pydantic_encoder, indent=4))
with open('data/extracted_prompts.json', 'w') as f:
    f.write(json.dumps(extracted_prompts, default=pydantic_encoder, indent=4))