In [1]:
import os
import json
import torch
import re
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset

In [2]:

# Load keys and set token
with open("../keys.json", "r") as file:
    token_data = json.load(file)
HUGGINGFACE_TOKEN = token_data["huggingface_access_token"]
os.environ["HF_TOKEN"] = HUGGINGFACE_TOKEN 

# Define model checkpoint and directories
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
scratch_dir = "../models"

# Create a BitsAndBytesConfig for 4-bit quantization.
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [3]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=scratch_dir)

# Load the model in 4-bit mode using quantization_config
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir=scratch_dir,
    device_map="auto",
    quantization_config=quantization_config
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:

# Define the LoRA configuration; parameters can be tuned as needed.
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # adjust based on model architecture
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
# Wrap the 4-bit quantized model with LoRA adapter using QLoRA.
model = get_peft_model(model, lora_config)

In [6]:
def extract_textbook_text_with_image_explainations(entry):
    texts = []
    lesson_name = entry.get("lessonName", "")
    if lesson_name:
        texts.append(f"Lesson: {lesson_name.strip()}")
    
    base_dir = "../data/tqa_train_val_test/train/"
    
    def extract_content_and_explanations(section):
        section_texts = []
        content = section.get("content", {})
        text = content.get("text", "")
        if text:
            section_texts.append(text.strip())
        figures = content.get("figures", [])
        for fig in figures:
            caption = fig.get("caption", "").strip()
            image_path = fig.get("imagePath", "")
            if image_path:
                if "textbook_images" in image_path:
                    explanation_folder = "textbook_images_llava_captions"
                elif "teaching_images" in image_path:
                    explanation_folder = "teaching_images_llava_captions"
                else:
                    explanation_folder = None
                if explanation_folder:
                    file_name = os.path.basename(image_path)
                    base_name = os.path.splitext(file_name)[0]
                    explanation_file = os.path.join(base_dir, explanation_folder, base_name + ".txt")
                    if os.path.exists(explanation_file):
                        with open(explanation_file, 'r', encoding="utf-8") as f:
                            explanation = f.read().strip()
                        if explanation:
                            combined_text = ""
                            if caption:
                                combined_text += f"Image Caption: {caption}. "
                            combined_text += f"Image Explanation: {explanation}"
                            section_texts.append(combined_text)
        return section_texts

    adjunct_topics = entry.get("adjunctTopics", {})
    for topic in adjunct_topics.values():
        texts.extend(extract_content_and_explanations(topic))
    
    topics = entry.get("topics", {})
    for topic in topics.values():
        texts.extend(extract_content_and_explanations(topic))
    
    return texts

def sanitize_filename(name):
    name = name.strip().replace(" ", "_")
    return re.sub(r'[^A-Za-z0-9_-]', '', name)

def create_training_example(entry, ground_truth_folder):
    identifier = entry.get("globalID", "")
    if not identifier or not identifier.strip():
        lesson_name = entry.get("lessonName", "")
        identifier = sanitize_filename(lesson_name) if lesson_name.strip() else f"entry_unknown"
    
    extracted_texts = extract_textbook_text_with_image_explainations(entry)
    combined_text = "\n\n".join(extracted_texts)
    if not combined_text.strip():
        return None
    
    gt_file = os.path.join(ground_truth_folder, f"mermaid_code_{identifier}.txt")
    if not os.path.exists(gt_file):
        print(f"Ground truth file not found for identifier {identifier} at {gt_file}")
        return None
    
    with open(gt_file, "r", encoding="utf-8") as f:
        ground_truth_mindmap = f.read().strip()
    
    training_text = (
        "Instruction: Generate a mind map in Mermaid syntax for the following textbook text. "
        "The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. "
        "Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics "
        "as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. "
        "Also, do NOT include any theme directives such as %%{init: {\"theme\": \"default\"}}%%, any parentheses or any extraneous formatting. "
        "The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure "
        "of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.\n\n"
        f"Input: {combined_text}\n"
        f"Output: {ground_truth_mindmap}"
    )
    return {"text": training_text}

In [6]:

# ----------------------------
# Load and prepare training data
# ----------------------------
train_data_path = "../data/tqa_train_val_test/train/tqa_v1_train.json"
with open(train_data_path, "r", encoding="utf-8") as f:
    train_entries = json.load(f)

dataset_folder = os.path.dirname(train_data_path)
ground_truth_folder = os.path.join(dataset_folder, "mindmap_text")

train_examples = []
for i, entry in tqdm(enumerate(train_entries), total=len(train_entries), desc="Preparing training examples"):
    try:
        example = create_training_example(entry, ground_truth_folder)
        if example is not None:
            train_examples.append(example)
    except Exception as e:
        print(f"Error processing training entry {i}: {e}")
        continue

if not train_examples:
    raise ValueError("No training examples were created. Check your ground truth folder and JSON data.")

train_dataset = Dataset.from_list(train_examples)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

Preparing training examples: 100%|██████████| 666/666 [00:00<00:00, 5734.18it/s]


In [7]:

##########################################
# Tokenize and mask prompt tokens
##########################################
def tokenize_function(examples):
    input_ids_list = []
    attention_mask_list = []
    labels_list = []
    max_length = 5000

    for full_text in examples["text"]:
        split_text = full_text.split("Output:")
        if len(split_text) == 2:
            prompt_part = split_text[0] + "Output:"  # Include the marker
            tokenized_full = tokenizer(full_text, truncation=True, padding="max_length", max_length=max_length)
            # Tokenize the prompt WITHOUT truncation to get the true prompt length.
            tokenized_prompt = tokenizer(prompt_part, add_special_tokens=False, truncation=False)
            prompt_length = len(tokenized_prompt["input_ids"])
            labels = tokenized_full["input_ids"].copy()
            if prompt_length < max_length:
                labels[:prompt_length] = [-100] * prompt_length
            else:
                # If prompt is too long, don't mask (or consider skipping this example)
                pass
        else:
            tokenized_full = tokenizer(full_text, truncation=True, padding="max_length", max_length=max_length)
            labels = tokenized_full["input_ids"].copy()
        
        def pad_and_truncate(seq, pad_value, length):
            if len(seq) < length:
                return seq + [pad_value] * (length - len(seq))
            return seq[:length]
        
        input_ids = pad_and_truncate(tokenized_full["input_ids"], tokenizer.pad_token_id, max_length)
        attention_mask = pad_and_truncate(tokenized_full["attention_mask"], 0, max_length)
        labels = pad_and_truncate(labels, -100, max_length)
        
        input_ids_list.append(input_ids)
        attention_mask_list.append(attention_mask)
        labels_list.append(labels)
        
    return {
        "input_ids": input_ids_list,
        "attention_mask": attention_mask_list,
        "labels": labels_list,
    }

tokenized_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


Map:   0%|          | 0/666 [00:00<?, ? examples/s]

In [8]:
##########################################
# Training Setup
##########################################
training_args = TrainingArguments(
    output_dir="./lora_mistral_mindmaps",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=1e-4,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    evaluation_strategy="no"
)



In [13]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

##########################################
# Train and Save the Model
##########################################
trainer.train()
model.save_pretrained("./lora_mistral_adapter_mindmaps")

Step,Training Loss
10,21.6702
20,5.4536
30,5.0922
40,4.5038


## Testing just Mistral7b

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import os
import json
from tqdm import tqdm

# Define model name and cache directory
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
scratch_dir = "../models"

# Create the quantization configuration (same as used during training)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)


In [2]:

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=scratch_dir)

# Load the base model with 4-bit quantization (without LoRA adapter)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir=scratch_dir,
    device_map="auto",
    quantization_config=quantization_config
)

# Load the test dataset JSON file.
test_data_path = "../data/tqa_train_val_test/test/tqa_v2_test.json"
with open(test_data_path, "r", encoding="utf-8") as f:
    test_entries = json.load(f)

# Define the dataset folder and ground truth folder if needed
dataset_folder = os.path.dirname(test_data_path)
ground_truth_folder = os.path.join(dataset_folder, "mindmap_text")


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
# Create the test prompt from an entry
def create_test_prompt(entry):
    extracted_texts = extract_textbook_text_with_image_explainations(entry)
    combined_text = "\n\n".join(extracted_texts)
    prompt = (
         "Instruction: Generate a mind map in Mermaid syntax for the following textbook text. "
         "The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. "
         "Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics "
         "as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. "
         "Also, do NOT include any theme directives such as %%{init: {\"theme\": \"default\"}}%%, any parentheses or any extraneous formatting. "
         "The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure "
         "of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.\n\n"
         f"Input: {combined_text}\n"
         "Output:"
    )
    return prompt

# Set up the text-generation pipeline using the base model
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)



Device set to use cuda:0


In [7]:
# Sample Inference Code

# Choose a sample test entry (e.g., the first entry in your test dataset)
sample_entry = test_entries[0]

# Build the prompt using your test prompt function
sample_prompt = create_test_prompt(sample_entry)
print("=== Sample Prompt ===")
print(sample_prompt)
print("=====================")

# Generate the mind map using the generation pipeline
sample_output = generator(sample_prompt, max_new_tokens=300, do_sample=False)
generated_text = sample_output[0]["generated_text"]

# Extract the generated output portion after "Output:" marker (if present)
if "Output:" in generated_text:
    mindmap = generated_text.split("Output:")[1].strip()
else:
    mindmap = generated_text.strip()

print("=== Generated Mind Map ===")
print(mindmap)
print("==========================")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


=== Sample Prompt ===
Instruction: Generate a mind map in Mermaid syntax for the following textbook text. The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. Also, do NOT include any theme directives such as %%{init: {"theme": "default"}}%%, any parentheses or any extraneous formatting. The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.

Input: Lesson: the nature of science

3. Write five questions that would get a friend interested in exploring the natural world. 4. A scientist was studying the effects of oil contamination on ocean seaweed. He believed t

## Testing with QLORA Adapter

In [13]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# Define your model name and cache directory (if applicable)
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
scratch_dir = "../models"

# Create the quantization configuration (same as used during training)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=scratch_dir)

# Load the base model with 4-bit quantization
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir=scratch_dir,
    device_map="auto",
    quantization_config=quantization_config
)

# Load the LoRA adapter into the base model from the saved directory.
# This wraps your base_model with the LoRA parameters.
model = PeftModel.from_pretrained(base_model, "./lora_mistral_adapter_mindmaps_old")


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [15]:
import os
import json
from tqdm import tqdm
from transformers import pipeline

# Load the test dataset JSON file.
test_data_path = "../data/tqa_train_val_test/test/tqa_v2_test.json"
with open(test_data_path, "r", encoding="utf-8") as f:
    test_entries = json.load(f)

# Assume the test dataset's directory (you can also have a separate ground truth folder for test if available)
dataset_folder = os.path.dirname(test_data_path)
# Optionally, if you have ground truth mindmaps for test, define:
ground_truth_folder = os.path.join(dataset_folder, "mindmap_text")
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

def create_test_prompt(entry):
    """
    Build the test prompt from an entry.
    This uses the same extraction function and fixed instruction as in training,
    but without including a ground truth output.
    """
    extracted_texts = extract_textbook_text_with_image_explainations(entry)
    combined_text = "\n\n".join(extracted_texts)
    prompt = (
         "Instruction: Generate a mind map in Mermaid syntax for the following textbook text. "
         "The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. "
         "Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics "
         "as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. "
         "Also, do NOT include any theme directives such as %%{init: {\"theme\": \"default\"}}%%, any parentheses or any extraneous formatting. "
         "The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure "
         "of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.\n\n"
         f"Input: {combined_text}\n"
         "Output:"
    )
    return prompt




Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianFor

In [16]:
# Sample Inference Code

# Choose a sample test entry (e.g., the first entry in your test dataset)
sample_entry = test_entries[0]  # assuming test_entries is already loaded

# Build the prompt using your test prompt function
sample_prompt = create_test_prompt(sample_entry)
print("=== Sample Prompt ===")
print(sample_prompt)
print("=====================")

# Generate the mind map using the generation pipeline
sample_output = generator(sample_prompt, max_new_tokens=300, do_sample=False)
generated_text = sample_output[0]["generated_text"]

# Extract the generated output portion after "Output:" marker (if present)
if "Output:" in generated_text:
    mindmap = generated_text.split("Output:")[1].strip()
else:
    mindmap = generated_text.strip()

print("=== Generated Mind Map ===")
print(mindmap)
print("==========================")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


=== Sample Prompt ===
Instruction: Generate a mind map in Mermaid syntax for the following textbook text. The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. Also, do NOT include any theme directives such as %%{init: {"theme": "default"}}%%, any parentheses or any extraneous formatting. The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.

Input: Lesson: the nature of science

3. Write five questions that would get a friend interested in exploring the natural world. 4. A scientist was studying the effects of oil contamination on ocean seaweed. He believed t

In [None]:
import os
from tqdm import tqdm

# Create an output folder for individual prediction files
predictions_folder = os.path.join(dataset_folder, "test_predictions")
os.makedirs(predictions_folder, exist_ok=True)

for i, entry in tqdm(enumerate(test_entries), total=len(test_entries), desc="Generating predictions"):
    prompt = create_test_prompt(entry)
    outputs = generator(prompt, max_new_tokens=300, do_sample=False)
    generated_text = outputs[0]["generated_text"]
    
    # Extract only the generated output (after "Output:" marker)
    if "Output:" in generated_text:
        prediction = generated_text.split("Output:")[1].strip()
    else:
        prediction = generated_text.strip()
    
    # Use the entry's identifier to create a unique filename
    identifier = entry.get("globalID", f"entry_{i}")
    if not identifier or not identifier.strip():
        lesson_name = entry.get("lessonName", "")
        identifier = lesson_name.strip().replace(" ", "_") if lesson_name.strip() else f"entry_{i}"
    # Sanitize identifier for filename safety.
    identifier = "".join([c for c in identifier if c.isalnum() or c in "_-"])
    
    output_file = os.path.join(predictions_folder, f"prediction_{identifier}.txt")
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(prediction)
        
print(f"Saved individual predictions to folder: {predictions_folder}")


In [20]:
# Sample Inference Code
combined_text="""
Motion suggests life, and as such, motion is a dimension we add to our
designs to make them more dynamic and engaging. Animation is a special
type of design form which we have created to help us take static designs
into more media-rich and interactive contexts. Aspecific type of animated 
content that we frequently create is the animated logo. 

Animation allows logos, which have been defined as the “visual figureheads" 
of brands [25], to better integrate within videos, livestreams, websites,
and social media.

A well-executed animation can quickly engage an audience, introduce the 
brand or individual online, and elevate content to have more visual interest.

Authoring an animated logo is challenging. Logos are often more than just a 
pairing of icon with text. Because they can have different layouts, layers,
color, and typography, they can take on great variety and be complex artifacts
to animate.

For a novice designer, it can be difficult to understand which design elements 
should be animated, in what sequence, and how to build up compelling and
believable motion. There are many facets of motion to consider such as speed,
timing, positioning, duration, easing, and motion personality
(e.g. a playful bounce vs. a strong entrance). 

Additionally, when logos have more design elements, designers also have to
understand how groups of elements can synchronize to coordinate motion and 
orchestrate a visual flow. While there is a great demand for animated content, 
it is diffi cult for people outside of motion design to develop this kind of
expertise. 

Design tools such as Adobe Express, Canva, and Figma often provide 
solutions in the form of animated templates and au tomatic animation 
techniques [10, 12, 13]. Templates pre-populate logo layouts with animations
that users can customize. They il lustrate how users can apply motion presets
(e.g. slide, flicker, or fade) onto logo elements to create professional-looking
animations. However, templates do not always adapt to every use case. 

When users make edits (e.g. add/remove/replace elements) to customize logo
templates, they can easily break the seamless and professional look the
templates were originally packaged with. An alternative to templates are
automatic animation techniques, which globally apply rules and heuristics to
animate canvases [12]. 

For example, all elements on a page can be directed to slide in from one side 
or 1Video: https://youtu.be/Jo9opkMH7iY 2Project P
age: https://vivian-liu.com/#/logomotion sequentially fade into place.
While templates and automatic tech niques can get users to a starting point fast,
neither solution works with a recognition of the user’s content,
which is something that can be enabled by emerging technologies.
Large language models (LLMs) present the potential for content aware animation.

They can generate animation code that is specific to the design elements and their 
layout on the canvas. Code is a text representation that is often used to drive animation [18, 33, 53], because it can concisely specify how elements interact over time and space on a canvas. Because LLMs encode a vast amount of world knowledge, they can draw upon actions and activities related to the content being animated and generate a near infinite number of animations. This open-ended generative capacity can go beyond the scope of what templates, presets, and rule-based techniques usually cover. Recent advancements have made LLMs more multimodal, such that they can take in both text and image as inputs, and provide visually-grounded responses. This make LLMs more applicable in domains like animation where a visual understanding of the canvas matters. It opens upthepotentialforuserstoprovideimagesoftheir layout to an LLM and receive animations tailored to their layout and design elements. For example, if a novice designer wanted to animate a taxi, they could use an LLM to generate code to drive a taxi onto the canvas. This code could translate the taxi object along the x-axis before easing it into the center of the canvas to imply a stop-and-go motion befitting of taxis. In this paper, we present LogoMotion, an LLM-based method that automatically animates static layouts in a content-aware way. LogoMotion generates code in a two-stage approach involving visually-grounded program synthesis and program repair. The first stage introduces multimodal LLM operators that take in visual context andhandlethe1)constructionofatextrepresentationofthe canvas, 2) conceptual grouping of elements, and 3) implementation of animation code. The second stage of our approach introduces a technique for visually-grounded program repair, which helps LLMs check what they have generated against the original layout and debug differences in a targeted layer-wise fashion. Our contributions are as follows: • LogoMotion, an LLM system that uses visually-grounded code generation to automatically generate logo animations from a PDF. The system identifies the visual content in each layer, infers the primary and secondary elements, and cre ates groups of elements. Based on this, the system suggests a design concept (in text) and uses the LLM to generate ani mation code. Users can optionally improve the animation by editing or adding their own design concept. • Visually-grounded program repair, a mechanism that lets the LLM automatically detect and debug visual errors within its generated animation code, creating a feedback loop between LLM-generated code and its visual outputs. • Atechnical evaluation of 276 animations showing that com pared to Canva Magic Animate and an ablated version of the system (without stages for hierarchy analysis and de sign concept suggestions), the full pipeline of LogoMotion produces animations that are more content-aware. LogoMotion: Visually Grounded Code Generation for Content-Aware Animation Woodstock ’18, June 03-05, 2018, Woodstock, NY • Aqualitative evaluation of novice users showing that Logo Motion is able to quickly achieve their desired animation with minimal reprompting.
"""
test_prompt = (
     "Instruction: Generate a mind map in Mermaid syntax for the following textbook text. "
     "The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. "
     "Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics "
     "as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. "
     "Also, do NOT include any theme directives such as %%{init: {\"theme\": \"default\"}}%%, any parentheses or any extraneous formatting. "
     "The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure "
     "of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.\n\n"
     f"Input: {combined_text}\n"
     "Output:"
)

# Generate the mind map using the generation pipeline
sample_output = generator(test_prompt, max_new_tokens=300, do_sample=False)
generated_text = sample_output[0]["generated_text"]

# Extract the generated output portion after "Output:" marker (if present)
if "Output:" in generated_text:
    mindmap = generated_text.split("Output:")[1].strip()
else:
    mindmap = generated_text.strip()

print("=== Generated Mind Map ===")
print(mindmap)
print("==========================")


=== Generated Mind Map ===
mindmap
  Motion and Animation
    Motion suggests life
    Animation is a special type of design form
    Animated logos for brands
    Challenges in authoring animated logos
      Design elements to animate
      Speed, timing, positioning, duration, easing, motion personality
      Synchronization of elements
    Solutions for novice designers
      Adobe Express, Canva, Figma
      Templates and automatic animation techniques
    Emerging technologies for content-aware animation
      Large language models (LLMs)
      Content-aware animation generation
    LogoMotion: An LLM-based method for content-aware animation
      Visually-grounded program synthesis
      Visually-grounded program repair
    Evaluation of LogoMotion
      Technical evaluation of 276 animations
      Qualitative evaluation of novice users
    Conclusion
      Potential for user-provided images for tailored animations
      Implications for the future of animation design
