# REST API Video Chunk Samples

## Objective
Sequential processing of video chunks in GPT-4V.	

## Time

You should expect to spend 5-10 minutes running this sample.

## Before you begin

#### Installation

In [None]:
%pip install -r ../requirements.txt

### Parameters
You need to set a series of configurations such as GPT-4V_DEPLOYMENT_NAME, OPENAI_API_BASE, OPENAI_API_VERSION, VISION_API_ENDPOINT.

Add "OPENAI_API_KEY" and "VISION_API_KEY" as variable name and \<Your API Key Value\> and \<Your VISION Key Value\> as variable value in the environment variables.
 <br>
      
      WINDOWS Users: 
         setx OPENAI_API_KEY "REPLACE_WITH_YOUR_KEY_VALUE_HERE"
         setx VISION_API_KEY "REPLACE_WITH_YOUR_KEY_VALUE_HERE"

      MACOS/LINUX Users: 
         export OPENAI_API_KEY="REPLACE_WITH_YOUR_KEY_VALUE_HERE"
         export VISION_API_KEY="REPLACE_WITH_YOUR_KEY_VALUE_HERE"

In [None]:
# Setting up the deployment name
deployment_name: str = "<your GPT-4V deployment name>"
# The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
openai_api_base: str = "<your resource base URL>"
# Currently OPENAI API have the following versions available: 2022-12-01.
# All versions follow the YYYY-MM-DD date structure.
openai_api_version: str = "<your OpenAI API version>"

# The base URL for your vision resource endpoint, e.g. "https://<your-resource-name>.cognitiveservices.azure.com"
vision_api_endpoint: str = "<your vision resource endpoint>"

# Insert your video SAS URL, e.g. https://<your-storage-account-name>.blob.core.windows.net/<your-container-name>/<your-video-name>?<SAS-token>
video_SAS_url = (
    "https://gpt4vsamples.blob.core.windows.net/videos/Redwire%20Field%20Trip%20-%203D%20Printing%20a%20Zune.mkv"
)
# This index name must be unique
video_index_name = "zune-chunk-demo-index"
# This video ID must be unique
video_id = "zure-video-1"

should_cleanup: bool = False

## Connect to your project
To start with let us create a config file with your project details. This file can be used in this sample or other samples to connect to your workspace.

In [None]:
import json
from pathlib import Path

config = {
    "GPT-4V_DEPLOYMENT_NAME": deployment_name,
    "OPENAI_API_BASE": openai_api_base,
    "OPENAI_API_VERSION": openai_api_version,
    "VISION_API_ENDPOINT": vision_api_endpoint,
}

p = Path("../config.json")

with p.open(mode="w") as file:
    file.write(json.dumps(config))

## Run this Example

In [None]:
import os
import requests
import time
import re
from moviepy.editor import VideoFileClip
import sys

parent_dir = Path(Path.cwd()).parent
sys.path.append(str(parent_dir))
from shared_functions import call_GPT4V_video, process_video_indexing

# Setting up the vision resource key
vision_api_key = os.getenv("VISION_API_KEY")

### Create Video Index


In [None]:
# You only need to run this cell once to create the index
process_video_indexing(vision_api_endpoint, vision_api_key, video_index_name, video_SAS_url, video_id)

### Call GPT-4V API with Video Index

In [None]:
#  Call GPT-4V API with Video Index on Each Video Chunk Sequentially


def download_video(sas_url: str, local_file_path: str) -> bool:
    try:
        response = requests.get(sas_url, stream=True)
        if response.status_code == 200:
            with Path(local_file_path).open("wb") as file:
                for chunk in response.iter_content(chunk_size=8192):
                    file.write(chunk)
            return True

        print(f"Download failed with status code: {response.status_code}")
        return False
    except Exception as e:
        print(f"An error occurred during download: {e}")
        return False


def get_video_length(file_path: str) -> int or None:
    try:
        with VideoFileClip(file_path) as video:
            return video.duration
    except Exception as e:
        print(f"Error in getting video length: {e}")
        return None


# Define the config values
vision_api_config = {"endpoint": vision_api_endpoint, "key": vision_api_key}

video_config = {
    "video_SAS_url": video_SAS_url,
    "video_index_name": video_index_name,
}

# Define the number of seconds for each segment
chunk_size = 120  # seconds
# Download the video
local_file_path = "downloaded_video.mp4"
if download_video(video_SAS_url, local_file_path):
    video_length = get_video_length(local_file_path)
    Path(local_file_path).unlink()  # Delete the downloaded video

    if video_length is not None:
        print(f"Video Length: {video_length} seconds")
        sys_message = f"""
        The total length of the video is {video_length}s. Your job is to analyze a {chunk_size}-
        sec segment of the video and 20 frames from that segment. You will then provide a Current Scene Breakdown of the 
        video so far. Scenes must cover the entire video and non-overlapping. This breakdown should be a JSON object, with 
        each scenes being a key, and the value being an array of information about the scene, including topic, visual description,
        start and end times formated MM:SS.
        """
        number_of_segments = int(video_length // chunk_size)
        updated_response = ""
        for i in range(number_of_segments + 1):  # Include the last segment
            start_time = i * chunk_size
            end_time = min((i + 1) * chunk_size, video_length)
            user_prompt = f"How many scenes from {start_time}s to {end_time}s?"
            print(f"Segment {i+1}: {user_prompt}")
            if i > 0:
                user_prompt += f"""And here are scenes in the previous segments: {updated_response}. 
                                You need to combine the scenes in the previous segments with the scenes in this segment and provide a summary.
                                """
            messages = [
                {"role": "system", "content": [{"type": "text", "text": sys_message}]},
                {"role": "user", "content": [{"type": "acv_document_id", "acv_document_id": video_id}]},
                {"role": "user", "content": [{"type": "text", "text": user_prompt}]},
            ]

            response = call_GPT4V_video(messages, vision_api=vision_api_config, video_index=video_config)
            updated_response = response["choices"][0]["message"]["content"]
            print(f"Response for segment {i+1}: {updated_response}")
            time.sleep(2)  # Avoid throttling

        # Print the final response
        sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", updated_response)
        for sentence in sentences:  # Print the content of the response
            print(sentence)
    else:
        print("Failed to process video length.")
else:
    print("Failed to download video.")

## Cleaning up

To clean up all Azure ML resources used in this example, you can delete the individual resources you created in this tutorial.

If you made a resource group specifically to run this example, you could instead [delete the resource group](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/delete-resource-group).

In [None]:
if should_cleanup:
    # {{TODO: Add resource cleanup}}
    pass