In [None]:
# Installation
!pip install --upgrade --quiet google-genai

In [None]:
# API Key setup
from google.colab import userdata
GEMINI_API_KEY: str = userdata.get('Apikey')
if(GEMINI_API_KEY):
  print("API Key found")
else:
  print("API Key not found")

API Key found


In [None]:
# Initilize and configure the client
# Select the model
from google import genai
from google.genai import Client

client: Client = genai.Client(
    api_key=GEMINI_API_KEY,
)

model: str = "gemini-2.0-flash-exp"

In [15]:
import time

def upload_video(video_file_name):
  video_file = client.files.upload(path=video_file_name)
  while video_file.state == "PROCESSING":
      print('Waiting for video to be processed.')
      time.sleep(10)
      video_file = client.files.get(name=video_file.name or "")

  if video_file.state == "FAILED":
    raise ValueError(video_file.state)
  print(f'Video processing complete: ' + (video_file.uri or ""))

  return video_file

intro_video = upload_video('/content/abu.mp4')


Waiting for video to be processed.
Video processing complete: https://generativelanguage.googleapis.com/v1beta/files/mk4d4hje7sgb


In [16]:
from IPython.display import display, Markdown
from google.genai.types import Content, Part
prompt = """Contextual Analysis

"What is the tone of the speaker in the video?"
"What emotions are conveyed by the person in the video?"
Visual Insights

"Can you describe the colors and patterns visible in the background?"
"Are there any noticeable objects or items in the scene, and what are they?"
Audio Details

"Is there any background noise or music in the video?"
"Are there any non-verbal sounds, like laughter or sighs?"
Scene Transitions

"How many distinct scenes are present in the video?"
"Are there any noticeable changes in lighting or camera angles between scenes?"
Speaker Attributes

"How would you describe the speaker’s appearance, such as clothing and hairstyle?"
"Does the speaker exhibit any gestures or body language that stand out?"
Comprehension Check

"What is the overall message or purpose of the video?"
"Does the content suggest any specific context, like a professional or casual setting?"

         """

video = intro_video

response = client.models.generate_content(
    model=model,
    contents=[
        Content(
            role="user",
            parts=[
                Part.from_uri(
                    file_uri=video.uri or "",
                    mime_type=video.mime_type or ""),
                ]),
        prompt,
    ]
)

display(Markdown(response.text))

Okay, let's break down this video based on your questions:

**Contextual Analysis**

*   **Tone of the Speaker:** The speaker's tone is neutral and matter-of-fact. He's providing information in a straightforward manner without much emotional inflection. 
*   **Emotions Conveyed:** There aren't strong emotions being conveyed. The speaker seems calm and perhaps slightly reserved. There's a subtle smile at the end, which might suggest a hint of friendliness.

**Visual Insights**

*   **Colors and Patterns:** The background is a mix of muted tones. We see a pale yellow wall, a brown door frame, and a striking red curtain with a busy, ornate pattern in gold or yellow. The curtain appears to be a prominent design element.
*   **Noticeable Objects:**
    *   A refrigerator or freezer, likely made of metal, is visible on the left side of the frame.
    *   A door frame, potentially leading to another room, is partially visible.
    *   A curtain or tapestry with a detailed, red and yellow pattern is on the right.
    *   The speaker appears to be wearing a jacket and sweater.

**Audio Details**

*   **Background Noise or Music:** There is no noticeable background noise or music. The audio is primarily the speaker's voice.
*   **Non-verbal Sounds:** There are no discernible non-verbal sounds such as laughter or sighs. The audio is clear and focused on the speaker's words.

**Scene Transitions**

*   **Distinct Scenes:** There is only one scene in this video. The camera remains in the same position throughout.
*   **Changes in Lighting or Camera Angles:** There are no changes in lighting or camera angles. The scene is consistent throughout.

**Speaker Attributes**

*   **Appearance:** The speaker is a young man with dark hair and a mustache. He is wearing glasses, a green sweater, and a dark, green/teal-colored jacket. His hairstyle is neat and slightly parted to the side.
*   **Gestures or Body Language:** The speaker primarily maintains a stationary position with minimal movement. He is not particularly expressive with his body language. At the end of the video, he shows a brief smile, which is the only notable gesture.

**Comprehension Check**

*   **Overall Message or Purpose:** The purpose of the video appears to be the speaker introducing himself, stating his name, age, and educational background. It's a brief self-introduction.
*   **Specific Context:** The setting appears to be casual. The speaker seems to be filming in what is likely his home. There is no specific formal context apparent, making it seem like a simple, informal recording.

If you have another video you'd like analyzed, feel free to ask!
