In [None]:
! pip3 install --upgrade --user google-cloud-aiplatform Pillow matplotlib

In [None]:
# Restart the runtime
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

In [None]:
PROJECT_ID = "alexbu-gke-dev-d"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

In [None]:
import base64
import vertexai
from vertexai.preview.vision_models import ImageGenerationModel
from vertexai.generative_models import GenerativeModel, Part, SafetySetting


import IPython.display
from PIL import Image as PIL_Image
from PIL import ImageOps as PIL_ImageOps
import matplotlib.pyplot as plt


# LLM Configuration
generation_config = {
    "max_output_tokens": 8192,
    "temperature": 1,
    "top_p": 0.95,
}

safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
]

def init_llm():
    vertexai.init(project=PROJECT_ID, location=LOCATION)

def generate_prompts_for_words(words_and_translations):
    model = GenerativeModel("gemini-1.5-flash-002")
    chat = model.start_chat()

    prompts = []
    for word_id, word, pinyin, translation in words_and_translations:
        response = chat.send_message(
            [f"""
            Generate a prompt that I can pass to an image generation model, like "imagen" to generate an image hint for Chinese word "{word}" pinyin "{pinyin}" translated as "{translation}" in a language learning app. The hint should be subtle without revealing the meaning. The picture should not have any text.
            """],
            generation_config=generation_config,
            safety_settings=safety_settings
        )
        # Extract the generated text part from the response
        prompt_text = response.candidates[0].content.parts[0].text
        prompts.append((word, prompt_text))
        #print(f"Prompt for word {word} generated: {prompt_text}")
    return prompts

# Function to generate images from prompts
def generate_images_from_prompts(prompts):
    image_generation_model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-001")
    image_generation_model_fast = ImageGenerationModel.from_pretrained("imagen-3.0-fast-generate-001")

    images = []
    for word, prompt in prompts:
        print(f"Generating image for word {word} with prompt: {prompt}")
        image = image_generation_model_fast.generate_images(
            prompt=prompt,
            number_of_images=1,
            aspect_ratio="3:4",
            safety_filter_level="block_some",
            person_generation="allow_adult",
        )
        images.append(image)

    return images

# Function to display images
def display_images(images):
    fig, axis = plt.subplots(len(images), 1, figsize=(6, 4 * len(images)))
    if len(images) == 1:
        axis = [axis]

    for i, image_set in enumerate(images):
        image = image_set[0]._pil_image
        axis[i].imshow(image)
        axis[i].set_title(f"Image for word {words_and_translations[i][1]}")
        axis[i].axis("off")

    plt.show()

# Function to display individual image
def display_image(image, max_width: int = 600, max_height: int = 350) -> None:
    pil_image = typing.cast(PIL_Image.Image, image._pil_image)
    if pil_image.mode != "RGB":
        pil_image = pil_image.convert("RGB")
    image_width, image_height = pil_image.size
    if max_width < image_width or max_height < image_height:
        pil_image = PIL_ImageOps.contain(pil_image, (max_width, max_height))
    IPython.display.display(pil_image)

init_llm()  # Initialize the LLM




In [None]:
# Sample words and translations
words_and_translations = [
    #(14, "饱", "bǎo", "full")
    #(15, "北方", "běifāng", "north"),
    #(16, "被", "bèi", "cover"),
    #(17, "鼻子", "bízi", "nose"),
    #(18, "比较", "bǐjiào", "compare"),
    #(19, "比赛", "bǐsài", "match"),
    #(20, "笔记本", "bǐjìběn", "notebook"),
    (21, "必须", "bìxū", "must"),
    (22, "变化", "biànhuà", "change"),
    #(23, "别人", "biéren", "others"),
    #(24, "冰箱", "bīngxiāng", "refrigerator"),
    #(25, "菜单", "càidān", "menu"),
    #(26, "参加", "cānjiā", "participate in")
]

prompts = generate_prompts_for_words(words_and_translations)  # Generate prompts for each word
images = generate_images_from_prompts(prompts)  # Generate images from prompts
display_images(images)  # Display the generated images