<h1 align ="center"> REST API Video Chunk Samples</h1>
<hr>
   
# Chat Completions

In [1]:
import json
import os
import requests
import time
import re
from moviepy.editor import VideoFileClip
%run shared_functions.ipynb

### Setup Parameters


Here we will load the configurations from _config.json_ file to setup vision_api_key, vision_api_endpoint, video_SAS_url, video_index_name, and video_id.

In [2]:
# Load config values
with open(r'config.json') as config_file:
    config_details = json.load(config_file)

# Setting up the vision resource key
vision_api_key = os.getenv("VISION_API_KEY")

# The base URL for your vision resource endpoint, e.g. "https://<your-resource-name>.cognitiveservices.azure.com"
# You must create your resource in the East US region.
vision_api_endpoint = config_details['VISION_API_ENDPOINT']

# Insert your video SAS URL, e.g. https://<your-storage-account-name>.blob.core.windows.net/<your-container-name>/<your-video-name>?<SAS-token>
video_SAS_url = "https://gpt4vsamples.blob.core.windows.net/videos/Redwire%20Field%20Trip%20-%203D%20Printing%20a%20Zune.mkv" #config_details["VIDEO_SAS_URL"]

# This index name must be unique
video_index_name = config_details["VIDEO_INDEX_NAME"]

# This video ID must be unique
video_id = config_details["VIDEO_INDEX_ID"]

### Create Video Index


In [6]:
# You only need to run this cell once to create the index
process_video_indexing(vision_api_endpoint, vision_api_key, video_index_name, video_SAS_url, video_id)

### Call GPT-4V API with Video Index

In [5]:
#  Call GPT-4V API with Video Index on Each Video Chunk Sequentially

def download_video(sas_url, local_file_path):
    try:
        response = requests.get(sas_url, stream=True)
        if response.status_code == 200:
            with open(local_file_path, 'wb') as file:
                for chunk in response.iter_content(chunk_size=8192):
                    file.write(chunk)
            return True
        else:
            print(f"Download failed with status code: {response.status_code}")
            return False
    except Exception as e:
        print(f"An error occurred during download: {e}")
        return False

def get_video_length(file_path):
    try:
        with VideoFileClip(file_path) as video:
            return video.duration
    except Exception as e:
        print(f"Error in getting video length: {e}")
        return None

# Define the config values
vision_api_config = {
    "endpoint": vision_api_endpoint,
    "key": vision_api_key
}

video_config = {
    "video_SAS_url": video_SAS_url,
    "video_index_name": video_index_name,
}

# Define the number of seconds for each segment
chunk_size = 20 # seconds
# Download the video
local_file_path = "downloaded_video.mp4"
if download_video(video_SAS_url, local_file_path):
    video_length = get_video_length(local_file_path)
    os.remove(local_file_path)

    if video_length is not None:
        print(f"Video Length: {video_length} seconds")
        sys_message = f"""
        The total length of the video is {video_length}s. Your task is to assist in finding all scenes in this video.
        You also need to describe each scene with start and end time. 
        """
        number_of_segments = int(video_length // chunk_size)
        updated_response = ""
        for i in range(number_of_segments + 1): # Include the last segment
            start_time = i * chunk_size
            end_time = min((i + 1) * chunk_size, video_length)
            user_prompt = f"How many scenes from {start_time}s to {end_time}s?"
            print(f"Segment {i+1}: {user_prompt}")
            if i > 0:
                user_prompt += f"""And here are scenes in the previous segments: {updated_response}. 
                                You need to combine the scenes in the previous segments with the scenes in this segment and provide a summary.
                                """
            messages = [
                {"role": "system", "content": [{"type": "text", "text": sys_message}]},
                {"role": "user", "content": [{"type": "acv_document_id", "acv_document_id": video_id}]},
                {"role": "user", "content": [{"type": "text", "text": user_prompt}]}
            ]

            response = call_GPT4V_video(messages, vision_api=vision_api_config, video_index=video_config)
            updated_response = response['choices'][0]['message']['content']
            #print(f"Response for segment {i+1}: {updated_response}")
            time.sleep(2) # Avoid throttling
        
        # Print the final response
        sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', updated_response)
        for sentence in sentences:  # Print the content of the response
            print(sentence)
    else:
        print("Failed to process video length.")
else:
    print("Failed to download video.")

Video Length: 437.28 seconds
Segment 1: How many scenes from 0s to 20s?
Segment 2: How many scenes from 20s to 40s?
Segment 3: How many scenes from 40s to 60s?
Segment 4: How many scenes from 60s to 80s?
Segment 5: How many scenes from 80s to 100s?
Segment 6: How many scenes from 100s to 120s?
Segment 7: How many scenes from 120s to 140s?
Segment 8: How many scenes from 140s to 160s?
Segment 9: How many scenes from 160s to 180s?
Segment 10: How many scenes from 180s to 200s?
Segment 11: How many scenes from 200s to 220s?
Segment 12: How many scenes from 220s to 240s?
Segment 13: How many scenes from 240s to 260s?
Segment 14: How many scenes from 260s to 280s?
Segment 15: How many scenes from 280s to 300s?
Segment 16: How many scenes from 300s to 320s?
Segment 17: How many scenes from 320s to 340s?
Segment 18: How many scenes from 340s to 360s?
Segment 19: How many scenes from 360s to 380s?
Segment 20: How many scenes from 380s to 400s?
Segment 21: How many scenes from 400s to 420s?
Seg