In [1]:
import boto3
import json

# 1. Configuration
# Note: Check 'aws bedrock list-foundation-models' for the exact ID.
# As of Dec 2025, it is likely 'amazon.nova-lite-v1:0' or 'amazon.nova-lite-v2:0'.
MODEL_ID = "amazon.nova-2-lite-v1:0" 
VIDEO_PATH = "../datasets/explore/videos/video_1/S1940E01-Scene-012.mp4"
PROMPT = "Describe what is happening in this video in one sentence."

def analyze_video(video_path, model_id, text_prompt):
    # Initialize Bedrock Runtime
    client = boto3.client("bedrock-runtime", region_name="us-east-1")

    # 2. Read Video File
    try:
        with open(video_path, "rb") as f:
            video_bytes = f.read()
    except FileNotFoundError:
        print(f"Error: File {video_path} not found.")
        return

    # 3. Construct the Message Payload (Converse API)
    # The 'content' list can contain text, images, documents, and videos.
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "video": {
                        "format": "mp4",  # Supported: mp4, mov, mkv, webm
                        "source": {
                            "bytes": video_bytes
                        }
                    }
                },
                {
                    "text": text_prompt
                }
            ]
        }
    ]

    # 4. Invoke the Model
    try:
        response = client.converse(
            modelId=model_id,
            messages=messages,
            inferenceConfig={
                "maxTokens": 2000,
                "temperature": 0.1,
                "topP": 0.9
            }
        )

        # 5. Parse Output
        output_text = response['output']['message']['content'][0]['text']
        print("-" * 30)
        print("Model Response:")
        print(output_text)
        print("-" * 30)
        
        # Optional: Print usage metrics
        token_usage = response['usage']
        print(f"Input Tokens: {token_usage['inputTokens']}")
        print(f"Output Tokens: {token_usage['outputTokens']}")

    except Exception as e:
        print(f"Inference Error: {e}")

if __name__ == "__main__":
    analyze_video(VIDEO_PATH, MODEL_ID, PROMPT)

Inference Error: An error occurred (ValidationException) when calling the Converse operation: Invocation of model ID amazon.nova-2-lite-v1:0 with on-demand throughput isn’t supported. Retry your request with the ID or ARN of an inference profile that contains this model.


In [6]:
import boto3
from botocore.config import Config
import base64

def to_b64(path: str) -> str:
    with open(path, "rb") as f:
        # return base64.b64encode(f.read()).decode("utf-8")
        return f.read()

video_path = "../datasets/explore/videos/video_1/S1940E01-Scene-012.mp4"

# Create the Bedrock Runtime client, using an extended timeout configuration
# to support long-running requests.
bedrock = boto3.client(
    "bedrock-runtime",
    region_name="us-east-1",
    config=Config(read_timeout=3600),
)

# Invoke the model
response = bedrock.converse(
    modelId="us.amazon.nova-2-lite-v1:0",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "video": {
                        "format": "mp4",  # Supported: mp4, mov, mkv, webm
                        "source": {
                            "bytes": to_b64(video_path)
                        }
                    }
                },
                {"text": "Describe what is happening in this video in one sentence. Also include character emotion."}
            ],
        }
    ],
    # system=[{"text": "You are a children's book author."}],  # Optional
    # inferenceConfig={  # These parameters are optional
    #     "maxTokens": 1500,
    #     "temperature": 0.7,
    #     "topP": 0.9,
    #     "stopSequences": ["THE END"],
    # },
    # additionalModelRequestFields={  # These parameters are optional
    #     "inferenceConfig": {
    #         "topK": 50,
    #     }
    # },
)

# Extract the text response
content_list = response["output"]["message"]["content"]
for content in content_list:
    if "text" in content:
        print(content["text"])

A brown mouse is running on a red carpet with a determined expression.


In [8]:
from synthesis.nova2_lite_model import Nova2LiteModel

model = Nova2LiteModel()

video_path = "/Users/scottcui/projects/mv_synthesis/datasets/ds2/videos/E10-Jerry_And_The_Goldfish/100-00;06;35.000-00;06;39.500.mp4"

r = await model.generate_vibe_card_frames(video_path, fps=3, max_frames=10, reasoning_effort="medium")
print(r)

These are sequential video frames in chronological order.
Times: frame_0001.jpg@00:00.00, frame_0002.jpg@00:00.33, frame_0003.jpg@00:01.00, frame_0004.jpg@00:01.33, frame_0005.jpg@00:02.00, frame_0006.jpg@00:02.34, frame_0007.jpg@00:03.00, frame_0008.jpg@00:03.34, frame_0009.jpg@00:04.00, frame_0010.jpg@00:04.34
Scene Description: A distressed anthropomorphic cat (Tom) kneels beside a large boiling pot spilling red, flaming substance onto a red carpet. Tom covers his face with one paw while wide-eyed and open-mouthed, exhibiting panic. Nearby kitchen utensils (knife, spoon) and a small brown ball lie scattered on the floor against a plain green wall.  

Vibe: Frantic, alarmed, and desperate; Tom’s exaggerated expressions and the chaotic spill create a sense of urgent mishap.  

Key Words: Tom, pot, flames, spilled substance, kitchen knife, steam, distressed cat, boiling overflow  

Musical Aspects (If Any): Staccato visual pacing with rapid shifts in Tom’s expressions synchronized to i

In [9]:
r = await model.generate_vibe_card(video_path)
print(r)

Scene Description: A dark blue cat with white gloves and red ears sits on a red surface, reacting with surprise as a stream of red, sparkling energy erupts from a boiling pot nearby. The cat raises its paws defensively while a knife lies on the floor beside the pot.  

Vibe: Startled, anxious, suspenseful  

Key Words: Cat, Pot, Knife, Magical Energy, Startled, Defensive, Kitchen  

Musical Aspects (If Any): Visual rhythm suggests "Mickey Mousing" with synchronized sizzling sound effects accompanying the red energy’s movement, creating a sudden, staccato rhythm that matches the cat’s startled reaction.
