In [1]:
import json
import pandas as pd

import vertexai
from google.cloud import storage
from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    HarmBlockThreshold,
    HarmCategory,
    Part,
    Content,
    GenerationResponse,    
)


In [2]:
project_id = "lab-project-426319"#"[project-id]"
location = "us-central1"

# set the google storage url to the video
video_file_url = 'gs://video_demo_test/wakeup_princess.mp4'

In [3]:
prompt = '''

       I have a video that I need you to analyze for ad placement by detecting scene changes, 
       also known as shot boundaries. I need to identify the 10 best scene changes across the 
       entire movie, which are the best potential points for ad placement as they minimize 
       interruptions for viewers. These scene changes should be selected from all parts of the movie: 
       the beginning, middle, and the very end. Make sure you distribute the selected scenes evenly across 
       the entire movie.
       For each of these scene changes, please provide:

        timestamp: The exact timestamps indicating where the scene change occurs. Make sure that the timestamp of scenes are matched those in the original movie,
        reflecting its position accurately. The timestamps must exactly match those in the original movie.
        
        reason: The reason why this is a scene change and why it is a good location for ad placement. the reason 
        should be very specific. Summarize the story after and before the scene and the explain why 
        between these two scene is a good place for an ad.
        
        summary: A brief summary of the scene before the change.
        
        transition_feeling: The main feeling that the transition makes in viewers like excitement, peace, fear, etc.
        
        transition_type: The method used to switch from one scene to another like cuts, fades, dissolves, etc.
        
        narrative_type: The main role or significance of the scene in the storyline like pivotal, climatic, conflict, etc.
        
        dialogue_intensity: The amount and intensity of dialogue in the scene like monologue, dialogue, narration, debate, etc.

        characters_type: The types of the most important character involved in the scene transition like protagonist, antagonist, supporting, etc.
        
        scene_categories:  Classification of the scene before the change into the categories such as action, drama, comedy, etc.
      
      '''      

# Specify the structure of output from Gemini
response_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "timestamp": {"type": "string"},
            "reason": {"type": "string"},
            "transition_feeling": {"type": "string"},
            "transition_type": {"type": "string"},
            "narrative_type": {"type": "string"},
            "dialogue_intensity": {"type": "string"},
            "characters_type": {"type": "string"},
            "scene_categories": {"type": "string"},
            
        },
        "required": ["timestamp", "reason","transition_feeling","transition_type","narrative_type",
                    "characters_type","scene_categories"],
    },
}



vertexai.init(project=project_id, location=location)
model_id = "gemini-1.5-pro-001"  
model = GenerativeModel(model_id)

# Gemini configuration
generation_config = GenerationConfig(temperature=0,
                                    response_mime_type="application/json", 
                                     response_schema=response_schema)

In [5]:
# Read the video 
video_file = Part.from_uri(video_file_url, mime_type="video/mp4")

# Create the content as Gemini input (text and video)
contents = [video_file, prompt]

response = model.generate_content(contents,generation_config=generation_config)

In [6]:
# Convert the Gemini response to json
json_response  =  json.loads(response.text)

In [7]:
# Conver json to dataframe
df_response = pd.DataFrame(json_response)

In [9]:
df_response.head()