# Commercial LLM Ground Truth

This notebook is meant to try and ground truth videos using Gemini 2.5

In [32]:
# Variables to tweak

# API Config

max_tokens = 99999

image_resize = 768

video_width, video_height = 1280, 720 

video_folder = './videos'
sequences_folder = './sequences' # Used to make the videos
output_dir = './outputs/ground_truths/'

#There is also the prompt messages which is later on in the file
message = '''This is demo only. not real. do your best. These frames are captured for a potential traffic incident. Give me quanitative information whenever possible. Give me the following and number each answer:
              Number of vehicles in accident in a number,
              Accident Type such as t-bone, rear end, etc,
              Person Injury yes or no,
              Need for ambulance yes or no,
              Need for firetruck yes or no,
              Need for Police yes or no,
              Types of vehicles involved, such as suv, truck, sedan,
              Fire yes or no,
              Day/night and weather, such as clear, etc,
              Low Res/Bad Footage yes or no.
              Please ignore any context before these images and this prompt
  '''


In [33]:
from IPython.display import display, clear_output, Markdown
import base64
from openai import OpenAI
import os
import cv2  # OpenCV is used for image encoding

import pathlib
import textwrap

from google import genai

import PIL.Image
from IPython.display import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
from PIL import Image
from IPython import display
import numpy as np
from typing import List
import json
from google.genai import types

In [34]:
#Turn sequences of images into a video

def make_video_from_images(image_folder: str, video_name: str, fps: int = 30) -> None:
    images = []
    for file in os.listdir(image_folder):
        if file.endswith(('.png', '.jpg', '.jpeg')):
            images.append(os.path.join(image_folder, file))

    #Sort the images by their filename as an integer (i.e. 0.jpg, 1.jpg, 2.jpg, ...)
    images = sorted(images, key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))

    if not images:
        print("No images found.")
        return

    # Create a video from the images
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(video_name, fourcc, fps, (video_width, video_height))

    for image in images:
        frame = cv2.imread(image)
        frame = cv2.resize(frame, (video_width, video_height))
        video.write(frame)

    video.release()
    print(f"Video saved as {video_name}")

for sequence in os.listdir(sequences_folder):
    print(f"Making video {os.path.join(video_folder, sequence) + '.mp4'}")
    if not os.path.exists(os.path.join(video_folder, sequence) + '.mp4'):
        make_video_from_images(os.path.join(sequences_folder, sequence), os.path.join(video_folder, f"{sequence}.mp4"), fps=60)
    else:
        print(f"Video {os.path.join(video_folder, sequence)} exists. Skipping video creation for this sequence.")

Making video ./videos\000000.mp4
Video ./videos\000000 exists. Skipping video creation for this sequence.
Making video ./videos\000001.mp4
Video ./videos\000001 exists. Skipping video creation for this sequence.
Making video ./videos\000002.mp4
Video ./videos\000002 exists. Skipping video creation for this sequence.
Making video ./videos\000003.mp4
Video ./videos\000003 exists. Skipping video creation for this sequence.
Making video ./videos\000004.mp4
Video ./videos\000004 exists. Skipping video creation for this sequence.
Making video ./videos\000005.mp4
Video ./videos\000005 exists. Skipping video creation for this sequence.
Making video ./videos\000006.mp4
Video ./videos\000006 exists. Skipping video creation for this sequence.
Making video ./videos\000007.mp4
Video ./videos\000007 exists. Skipping video creation for this sequence.
Making video ./videos\000008.mp4
Video ./videos\000008 exists. Skipping video creation for this sequence.
Making video ./videos\000009.mp4
Video ./video

In [35]:
#Gemini API Key setup
with open(".env") as f:
    gemini_api_key = f.read().strip().split('=')[1]

#Load the Gemini API key
client = genai.Client(api_key=gemini_api_key)

In [36]:
def get_gemini_response(prompt: str, video_path: str) -> str:
    """
    Sends a prompt and a video file to Gemini 2. and returns the response.

    Args:
        prompt (str): The prompt to send.
        video_path (str): Path to the video file.

    Returns:
        str: Gemini's response.
    """
    #Give me the size of the video in MB -- if bigger than 20 MB, print a warning
    video_size = os.path.getsize(video_path) / (1024 * 1024)  # Convert to MB
    if video_size > 20:
        print(f"Warning: Video size is {video_size:.2f} MB, which is larger than the recommended 20 MB.")
        return ""
    video_bytes = open(video_path, 'rb').read()

    #Gemini 2.5 wouldn't work due to the video size, so I ended up using 2.0 flash instead
    response = client.models.generate_content(
        model='models/gemini-2.0-flash',
        contents=types.Content(
            parts=[
                types.Part(
                    inline_data=types.Blob(data=video_bytes, mime_type='video/mp4')
                ),
                types.Part(text=prompt)
            ]
        )
    )
    return response.text

In [37]:
def process_videos(output_file: str) -> str:
    """
    Processes a video file and returns the response from Gemini.

    Args:
        video_path (str): Path to the video file.

    Returns:
        None
    """
    print(f"Processing and writing to {output_file}")
    responses = ""

    for video_file in os.listdir(video_folder):
        if video_file.endswith('.mp4'):
            video_path = os.path.join(video_folder, video_file)
            response = get_gemini_response(message, video_path)
            responses += f"Response for {video_file}:\n"
            responses += response + "\n\n"
            print(f"Processed {video_file} with response: {response}")

            # Save responses to a txt file
            output_file = pathlib.Path(output_file)
            with open(output_file, 'w') as f:
                f.write(responses)
    print(f"Responses saved to {output_file}")

In [38]:
for output_base in ["1", "2", "3", "4", "5"]:
    process_videos(os.path.join(output_dir, f"{output_base}.txt"))

Processing and writing to ./outputs/ground_truths/1.txt
Processed 000000.mp4 with response: Okay, I will analyze the provided frames and provide the information you requested.

1.  **Number of vehicles in accident:** 2
2.  **Accident Type:** Angle/Side Impact
3.  **Person Injury:** Yes
4.  **Need for ambulance:** Yes
5.  **Need for firetruck:** No
6.  **Need for Police:** Yes
7.  **Types of vehicles involved:** Motorcycle, Motorcycle
8.  **Fire:** No
9.  **Day/night and weather:** Day, clear
10. **Low Res/Bad Footage:** Yes

Processed 000001.mp4 with response: Okay, I will do my best based on the provided images and the prompt.

1.  **Number of vehicles in accident:** 3
2.  **Accident Type:** Appears to be a chain reaction rear-end collision.
3.  **Person Injury:** Possibly.  It is not directly visible if there are injuries, but the severity of the crash suggests the possibility.
4.  **Need for ambulance:** Possibly. Given the nature of the accident, the need for an ambulance cannot be

RemoteProtocolError: Server disconnected without sending a response.