In [None]:
import os
import requests

# Configuration
GPT_4V_ENDPOINT = "https://mohammedzeeshan-aiservices1332475481.openai.azure.com/openai/deployments/gpt-4/extensions/chat/completions?api-version=2023-07-01-preview"
GPT_4V_KEY = "Azure openAI GPT-4V key"
VISION_API_ENDPOINT = "https://mohammedzeeshan-aiservices1332475481.openai.azure.com/"
VISION_API_KEY = "Azure Computer Vision API key"

## ingest the video
VIDEO_FILE_SAS_URL = "SAS URL to the video file"
VIDEO_INDEX_NAME = "pgybjvahgbl"
VIDEO_DOCUMENT_ID = "AOAIChatDocument"

def create_video_index(vision_api_endpoint, vision_api_key, index_name):
    url = f"{vision_api_endpoint}/computervision/retrieval/indexes/{index_name}?api-version=2023-05-01-preview"
    headers = {"Ocp-Apim-Subscription-Key": vision_api_key, "Content-Type": "application/json"}
    data = {
        "features": [
            {"name": "vision", "domain": "surveillance"}
        ]
    }
    response = requests.put(url, headers=headers, data=json.dumps(data))
    return response

def add_video_to_index(vision_api_endpoint, vision_api_key, index_name, video_url, video_id):
    url = f"{vision_api_endpoint}/computervision/retrieval/indexes/{index_name}/ingestions/my-ingestion?api-version=2023-05-01-preview"
    headers = {"Ocp-Apim-Subscription-Key": vision_api_key, "Content-Type": "application/json"}
    data = {
        'videos': [{'mode': 'add', 'documentId': video_id, 'documentUrl': video_url}]
    }
    response = requests.put(url, headers=headers, data=json.dumps(data))
    return response

def wait_for_ingestion_completion(vision_api_endpoint, vision_api_key, index_name, max_retries=30):
    url = f"{vision_api_endpoint}/computervision/retrieval/indexes/{index_name}/ingestions?api-version=2023-05-01-preview"
    headers = {"Ocp-Apim-Subscription-Key": vision_api_key}
    retries = 0
    while retries < max_retries:
        time.sleep(10)
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            state_data = response.json()
            if state_data['value'][0]['state'] == 'Completed':
                print(state_data)
                print('Ingestion completed.')
                return True
        retries += 1
    return False


# Step 1: Create an Index
response = create_video_index(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME)
print(response.status_code, response.text)

# Step 2: Add a video file to the index
response = add_video_to_index(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME, VIDEO_FILE_SAS_URL, VIDEO_DOCUMENT_ID)
print(response.status_code, response.text)

# Step 3: Wait for ingestion to complete
if not wait_for_ingestion_completion(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME):
    print("Ingestion did not complete within the expected time.")


## Chat with GPT-4V

headers = {
    "Content-Type": "application/json",
    "api-key": GPT_4V_KEY,
}

# Payload for the request
payload = {
    "dataSources": [
        {
            "type": "AzureComputerVisionVideoIndex",
            "parameters": {
                "computerVisionBaseUrl": f"{VISION_API_ENDPOINT}/computervision",
                "computerVisionApiKey": VISION_API_KEY,
                "indexName": VIDEO_INDEX_NAME,
                "videoUrls": [VIDEO_FILE_SAS_URL]
            }
        }
    ],
    "enhancements": {
        "video": {
            "enabled": True
        }
    },
    "messages": [
     {
          "role": "system",
          "content": [
               {
                    "type": "text",
                    "text": "You are an AI assistant that helps people find information."
               }
          ]
     },
     {
          "role": "user",
          "content": [
               {
                    "type": "acv_document_id",
                    "acv_document_id": "AOAIChatDocument"
               },
               {
                    "type": "text",
                    "text": "\n \nanalysis this video"
               }
          ]
     },
     {
          "role": "assistant",
          "content": [
               {
                    "type": "text",
                    "text": "The video provided appears to show a static image of a \"HOME INSURANCE POLICY\" document over a period of 13 seconds. The frames from 00:00:00 to 00:00:12 show no noticeable changes or movements, suggesting that the video may be a still shot of the document on a desk with a notebook in the background. The person's hand is holding a pen, poised to write or fill in details on the insurance policy form. The document includes sections for personal details and policy details. The lack of motion suggests the video's purpose might be to focus on the document itself, possibly for informational or instructional use."
               }
          ]
     }
],
    "temperature": 0.7,
    "top_p": 0.95,
    "max_tokens": 800
}

# Send request
try:
    response = requests.post(GPT_4V_ENDPOINT, headers=headers, json=payload)
    response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
except requests.RequestException as e:
    raise SystemExit(f"Failed to make the request. Error: {e}")

# Handle the response as needed (e.g., print or process)
print(response.json())