From 90c4bb9e41621fc127a4c9b5e712a47a3f57e23c Mon Sep 17 00:00:00 2001 From: Gablans Date: Wed, 13 Aug 2025 06:24:49 +0000 Subject: [PATCH 1/9] Preview.2 support for Structured Video Chapters --- .gitignore | 2 +- .../video_chapters_structured.json | 58 +++++ notebooks/video_chapters_structured.ipynb | 245 ++++++++++++++++++ python/chapters_utility.py | 64 +++++ 4 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 analyzer_templates/video_chapters_structured.json create mode 100644 notebooks/video_chapters_structured.ipynb create mode 100644 python/chapters_utility.py diff --git a/.gitignore b/.gitignore index 143fb90..695640a 100644 --- a/.gitignore +++ b/.gitignore @@ -122,7 +122,7 @@ celerybeat.pid *.sage.py # Environments -.env +/.env .venv env/ venv/ diff --git a/analyzer_templates/video_chapters_structured.json b/analyzer_templates/video_chapters_structured.json new file mode 100644 index 0000000..dd844bb --- /dev/null +++ b/analyzer_templates/video_chapters_structured.json @@ -0,0 +1,58 @@ +{ + "description": "test model", + "scenario": "videoShot", + "config": { + "returnDetails": true, + "enableSegmentation": true, + "enableFace": false, + "disableFaceBlurring": true, + "disableContentFiltering": true, + "segmentationMode": "custom", + "segmentationDefinition": "Segment the video into only three chapter types: 'Topic Introduction', 'Details About the Work Done', and 'Outcome, Conclusion and Results'. Reason about the content and determine the best time to segment the video according to these chapter types. Use the timestamp of each image to identify the start and end time of each chapter, and avoid chapter overlap.", + "locales": [ + "en-US" + ] + }, + "BaseAnalyzerId": "prebuilt-videoAnalyzer", + "fieldSchema": { + "name": "Content Understanding", + "description": "NO USE", + "fields": { + "Segments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "SegmentId": { "type": "string" }, + "SegmentType": { + "type": "string", + "method": "generate", + "description": "The chapter type for the segment" + }, + "Scenes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Description": { + "type": "string", + "method": "generate", + "description": "A 5 word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." + }, + "StartTimestamp": { + "type": "string", + "description": "the start timestamp of the scene" + }, + "EndTimestamp": { + "type": "string", + "description": "the end timestamp of the scene" + } + } + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/notebooks/video_chapters_structured.ipynb b/notebooks/video_chapters_structured.ipynb new file mode 100644 index 0000000..aa15cf5 --- /dev/null +++ b/notebooks/video_chapters_structured.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "11049ef0", + "metadata": {}, + "source": [ + "# Video Chapters Generation" + ] + }, + { + "cell_type": "markdown", + "id": "beccbe11", + "metadata": {}, + "source": [ + "Generate video chapters based on Azure Content Understanding and Azure OpenAI." + ] + }, + { + "cell_type": "markdown", + "id": "0a44bdf4", + "metadata": {}, + "source": [ + "\n", + "## Pre-requisites\n", + "1. Follow [README](../README.md#configure-azure-ai-service-resource) to create essential resource that will be used in this sample.\n", + "1. Install required packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dfa60be", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -r ../requirements.txt" + ] + }, + { + "cell_type": "markdown", + "id": "fcefeaab", + "metadata": {}, + "source": [ + "## Load environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c69047b", + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "import os\n", + "\n", + "load_dotenv(dotenv_path=\".env\", override=True)\n", + "\n", + "AZURE_AI_SERVICE_ENDPOINT = os.getenv(\"AZURE_AI_SERVICE_ENDPOINT\")\n", + "AZURE_AI_SERVICE_API_VERSION = os.getenv(\"AZURE_AI_SERVICE_API_VERSION\", \"2025-05-01-preview\")\n", + "\n", + "AZURE_OPENAI_ENDPOINT = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n", + "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-08-01-preview\")\n", + "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv(\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\")" + ] + }, + { + "cell_type": "markdown", + "id": "7fe969de", + "metadata": {}, + "source": [ + "If you haven't done so, please authenticate by running **'az login'** through the terminal. This credentials are used to validate that you have access to the resources you defined above.\n", + "\n", + "Make sure you have Azure CLI installed on your system. To install --> curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ea18419", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "## This small routine checks Azure CLI login status\n", + "\n", + "import subprocess\n", + "\n", + "try:\n", + " subprocess.run(\"az account show\", check=True, shell=True)\n", + " print(\"Already logged in to Azure CLI.\")\n", + "except subprocess.CalledProcessError:\n", + " print(\"Not logged in. Please log in now...\")\n", + " subprocess.run(\"az login\", shell=True)\n" + ] + }, + { + "cell_type": "markdown", + "id": "78e7d414", + "metadata": {}, + "source": [ + "## File to Analyze\n", + "\n", + "Use the following variable to define what file to analyze. For this example, we will be examining a small tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c9fb2b0", + "metadata": {}, + "outputs": [], + "source": [ + "VIDEO_LOCATION = \"https://github.com/Azure-Samples/azure-ai-content-understanding-assets/raw/refs/heads/main/videos/learning/learning2.mp4\"" + ] + }, + { + "cell_type": "markdown", + "id": "57b9abf6", + "metadata": {}, + "source": [ + "## Create a custom analyzer and submit the video to generate chapters\n", + "The custom analyzer schema for this notebook is [video_chapters_structured.json](../analyzer_templates/video_chapters_structured.json). This file defines the schema and configuration for a custom video analyzer. It specifies how a video should be segmented into chapters and scenes, including three chapter types: \"Topic Introduction\", \"Details About the Work Done\", and \"Conclusion or Results\". Each segment contains a list of scenes, with each scene described by a short description, start timestamp, and end timestamp. The configuration section controls segmentation behavior and other analysis options, while the fieldSchema section outlines the expected structure of the output, ensuring chapters and scenes are clearly organized and non-overlapping.\n", + "\n", + "In this example, we will use the utility class `AzureContentUnderstandingClient` to load the analyzer schema from the template file and submit it to Azure Content Understanding service. Then, we will analyze the video and generate the desired chapter and scene structure.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40e52230", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "import json\n", + "import uuid\n", + "\n", + "\n", + "# add the parent directory to the path to use shared modules\n", + "parent_dir = Path(Path.cwd()).parent\n", + "sys.path.append(\n", + " str(parent_dir)\n", + ")\n", + "from python.content_understanding_client import AzureContentUnderstandingClient\n", + "\n", + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", + "credential = DefaultAzureCredential()\n", + "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n", + "\n", + "# The analyzer template is used to define the schema of the output\n", + "ANALYZER_TEMPLATE_PATH = \"../analyzer_templates/video_chapters_structured.json\"\n", + "ANALYZER_ID = \"video_scene_chapter\" + \"_\" + str(uuid.uuid4()) # Unique identifier for the analyzer\n", + "\n", + "# Create the Content Understanding (CU) client\n", + "cu_client = AzureContentUnderstandingClient(\n", + " endpoint=AZURE_AI_SERVICE_ENDPOINT,\n", + " api_version=AZURE_AI_SERVICE_API_VERSION,\n", + " token_provider=token_provider,\n", + " x_ms_useragent=\"azure-ai-content-understanding-python/video_chapters_structured\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n", + ")\n", + "\n", + "# Use the client to create an analyzer\n", + "response = cu_client.begin_create_analyzer(\n", + " ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_PATH)\n", + "result = cu_client.poll_result(response)\n", + "\n", + "print(json.dumps(result, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "9b85cf38", + "metadata": {}, + "source": [ + "### Use the created analyzer to extract video content\n", + "It might take some time depending on the video length. Try with short videos to get results faster" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad31ab6a", + "metadata": {}, + "outputs": [], + "source": [ + "# Submit the video for content analysis\n", + "response = cu_client.begin_analyze(ANALYZER_ID, file_location=VIDEO_LOCATION)\n", + "\n", + "# Wait for the analysis to complete and get the content analysis result\n", + "video_cu_result = cu_client.poll_result(\n", + " response, timeout_seconds=3600) # 1 hour timeout for long videos\n", + "\n", + "# Print the content analysis result\n", + "print(f\"Video Content Understanding result: \", video_cu_result)\n", + "\n", + "# Optional - Delete the analyzer if it is no longer needed\n", + "cu_client.delete_analyzer(ANALYZER_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55d1342f", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display, HTML\n", + "from python.chapters_utility import ChaptersFormatter\n", + "\n", + "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result)\n", + "display(HTML(full_html))" + ] + }, + { + "cell_type": "markdown", + "id": "1126b263", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/chapters_utility.py b/python/chapters_utility.py new file mode 100644 index 0000000..cbf8cec --- /dev/null +++ b/python/chapters_utility.py @@ -0,0 +1,64 @@ + +class ChaptersFormatter: + """Formating Utility for Table of Contents""" + + def format_chapters_output(video_URL, video_cu_result): + """Formats the chapters output for the video.""" + + segments = ( + video_cu_result + .get("result", {}) + .get("contents", [])[0] + .get("fields", {}) + .get("Segments", {}) + .get("valueArray", []) + ) + + toc_html = "
" + toc_html += "Table of Contents
" + + full_html = f""" +
+
{toc_html}
+
+ +
+
+ + """ + + return full_html + + From 9114d56023590789a1359ca9c6a6798de6c93da4 Mon Sep 17 00:00:00 2001 From: Gablans Date: Wed, 13 Aug 2025 19:53:03 +0000 Subject: [PATCH 2/9] Small analyzer update to follow structure --- analyzer_templates/video_chapters_structured.json | 2 +- notebooks/video_chapters_structured.ipynb | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/analyzer_templates/video_chapters_structured.json b/analyzer_templates/video_chapters_structured.json index dd844bb..5503371 100644 --- a/analyzer_templates/video_chapters_structured.json +++ b/analyzer_templates/video_chapters_structured.json @@ -8,7 +8,7 @@ "disableFaceBlurring": true, "disableContentFiltering": true, "segmentationMode": "custom", - "segmentationDefinition": "Segment the video into only three chapter types: 'Topic Introduction', 'Details About the Work Done', and 'Outcome, Conclusion and Results'. Reason about the content and determine the best time to segment the video according to these chapter types. Use the timestamp of each image to identify the start and end time of each chapter, and avoid chapter overlap.", + "segmentationDefinition": "Segment the video into only three chapter types: 'Topic Introduction', 'Details About the Work Done', and 'Outcome, Conclusion and Results'. Reason about the content and determine the best time to segment the video according to these chapter types. Use the timestamp of each image to identify the start and end time of each chapter, and avoid chapter overlap. You must always define the three chapter types, and each chapter must have at least one scene.", "locales": [ "en-US" ] diff --git a/notebooks/video_chapters_structured.ipynb b/notebooks/video_chapters_structured.ipynb index aa15cf5..a80d9d3 100644 --- a/notebooks/video_chapters_structured.ipynb +++ b/notebooks/video_chapters_structured.ipynb @@ -83,16 +83,11 @@ "outputs": [], "source": [ "\n", - "## This small routine checks Azure CLI login status\n", + "## Authehticate if you are running this notebook for the first time.\n", "\n", "import subprocess\n", "\n", - "try:\n", - " subprocess.run(\"az account show\", check=True, shell=True)\n", - " print(\"Already logged in to Azure CLI.\")\n", - "except subprocess.CalledProcessError:\n", - " print(\"Not logged in. Please log in now...\")\n", - " subprocess.run(\"az login\", shell=True)\n" + "subprocess.run(\"az login\", shell=True)\n" ] }, { From fddb748e3c27cf58d01907f17342823dc9a1da3a Mon Sep 17 00:00:00 2001 From: Gablans Date: Wed, 13 Aug 2025 22:52:01 +0000 Subject: [PATCH 3/9] Minor copy Edits --- notebooks/video_chapters_structured.ipynb | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/notebooks/video_chapters_structured.ipynb b/notebooks/video_chapters_structured.ipynb index a80d9d3..7151a8e 100644 --- a/notebooks/video_chapters_structured.ipynb +++ b/notebooks/video_chapters_structured.ipynb @@ -5,7 +5,7 @@ "id": "11049ef0", "metadata": {}, "source": [ - "# Video Chapters Generation" + "# Structured Video Chapters Generation" ] }, { @@ -13,7 +13,7 @@ "id": "beccbe11", "metadata": {}, "source": [ - "Generate video chapters based on Azure Content Understanding and Azure OpenAI." + "Generate chapters for a video that follow a defined structure using Azure Content Understanding and Azure OpenAI." ] }, { @@ -97,7 +97,7 @@ "source": [ "## File to Analyze\n", "\n", - "Use the following variable to define what file to analyze. For this example, we will be examining a small tutorial." + "Use the following variable to define what video file to analyze. For this example, we will be examining a small tutorial." ] }, { @@ -171,7 +171,7 @@ "metadata": {}, "source": [ "### Use the created analyzer to extract video content\n", - "It might take some time depending on the video length. Try with short videos to get results faster" + "It might take some time depending on the video length. Try with short videos to get results faster." ] }, { @@ -195,6 +195,14 @@ "cu_client.delete_analyzer(ANALYZER_ID)" ] }, + { + "cell_type": "markdown", + "id": "0018b59f", + "metadata": {}, + "source": [ + "We will now display the results with the following code:" + ] + }, { "cell_type": "code", "execution_count": null, From e71881c9bbbda34e4825054a93cd616e76801f67 Mon Sep 17 00:00:00 2001 From: Gablans Date: Wed, 13 Aug 2025 23:03:48 +0000 Subject: [PATCH 4/9] Minor copy edit for Title and Intro --- notebooks/video_chapters_structured.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/video_chapters_structured.ipynb b/notebooks/video_chapters_structured.ipynb index 7151a8e..e0a06ae 100644 --- a/notebooks/video_chapters_structured.ipynb +++ b/notebooks/video_chapters_structured.ipynb @@ -5,7 +5,7 @@ "id": "11049ef0", "metadata": {}, "source": [ - "# Structured Video Chapters Generation" + "# Structured Video Chaptering" ] }, { @@ -13,7 +13,7 @@ "id": "beccbe11", "metadata": {}, "source": [ - "Generate chapters for a video that follow a defined structure using Azure Content Understanding and Azure OpenAI." + "This notebook demonstrates how to automatically generate video chapters that follow a defined structure using Azure Content Understanding and GPT. You can define a specific content structure to be followed by the LLM model to segment the video into meaningful chapters and scenes, providing concise descriptions and timestamps." ] }, { From 993ea06a023852448677e8bd1c03960a46642b97 Mon Sep 17 00:00:00 2001 From: Gablans Date: Wed, 13 Aug 2025 23:37:59 +0000 Subject: [PATCH 5/9] Initial Check-in of Dynamic Video Chapters --- .../video_chapters_dynamic.json | 56 ++++ notebooks/video_chapters_dynamic.ipynb | 240 ++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 analyzer_templates/video_chapters_dynamic.json create mode 100644 notebooks/video_chapters_dynamic.ipynb diff --git a/analyzer_templates/video_chapters_dynamic.json b/analyzer_templates/video_chapters_dynamic.json new file mode 100644 index 0000000..18a56f6 --- /dev/null +++ b/analyzer_templates/video_chapters_dynamic.json @@ -0,0 +1,56 @@ +{ + "description": "test model", + "scenario": "videoShot", + "config": { + "returnDetails": true, + "enableSegmentation": true, + "disableContentFiltering": true, + "segmentationMode": "custom", + "segmentationDefinition": "Segment the video into stories or chapters. A story (chapter) in a video is a self-contained portion of the program dedicated to a specific news story, topic, or theme. Each segment typically includes a distinct introduction, development, and (sometimes) a conclusion, and can feature a combination of elements such as reporter narration, interviews, sound bites, relevant footage (B-roll), and graphics.", + "locales": [ + "en-US" + ] + }, + "BaseAnalyzerId": "prebuilt-videoAnalyzer", + "fieldSchema": { + "name": "Content Understanding", + "description": "NO USE", + "fields": { + "Segments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "SegmentId": { "type": "string" }, + "SegmentType": { + "type": "string", + "method": "generate", + "description": "The short title or a short summary of the story or chapter." + }, + "Scenes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Description": { + "type": "string", + "method": "generate", + "description": "A 5 word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." + }, + "StartTimestamp": { + "type": "string", + "description": "the start timestamp of the scene" + }, + "EndTimestamp": { + "type": "string", + "description": "the end timestamp of the scene" + } + } + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/notebooks/video_chapters_dynamic.ipynb b/notebooks/video_chapters_dynamic.ipynb new file mode 100644 index 0000000..5d2bc11 --- /dev/null +++ b/notebooks/video_chapters_dynamic.ipynb @@ -0,0 +1,240 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "11049ef0", + "metadata": {}, + "source": [ + "# Dynamic Video Chaptering" + ] + }, + { + "cell_type": "markdown", + "id": "beccbe11", + "metadata": {}, + "source": [ + "This notebook demonstrates how to automatically generate video chapters using Azure Content Understanding. By analyzing the video content, the LLM model applies its own reasoning to segment the video into meaningful chapters and scenes, providing concise descriptions and timestamps. This approach enables users to quickly create a structured table of contents for any video, powered by advanced AI understanding." + ] + }, + { + "cell_type": "markdown", + "id": "0a44bdf4", + "metadata": {}, + "source": [ + "\n", + "## Pre-requisites\n", + "1. Follow [README](../README.md#configure-azure-ai-service-resource) to create essential resource that will be used in this sample.\n", + "1. Install required packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dfa60be", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -r ../requirements.txt" + ] + }, + { + "cell_type": "markdown", + "id": "fcefeaab", + "metadata": {}, + "source": [ + "## Load environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c69047b", + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "import os\n", + "\n", + "load_dotenv(dotenv_path=\".env\", override=True)\n", + "\n", + "AZURE_AI_SERVICE_ENDPOINT = os.getenv(\"AZURE_AI_SERVICE_ENDPOINT\")\n", + "AZURE_AI_SERVICE_API_VERSION = os.getenv(\"AZURE_AI_SERVICE_API_VERSION\", \"2025-05-01-preview\")\n", + "\n", + "AZURE_OPENAI_ENDPOINT = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n", + "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-08-01-preview\")\n", + "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv(\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\")" + ] + }, + { + "cell_type": "markdown", + "id": "7fe969de", + "metadata": {}, + "source": [ + "If you haven't done so, please authenticate by running **'az login'** through the terminal. This credentials are used to validate that you have access to the resources you defined above.\n", + "\n", + "Make sure you have Azure CLI installed on your system. To install --> curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ea18419", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "## Authehticate if you are running this notebook for the first time.\n", + "\n", + "import subprocess\n", + "\n", + "subprocess.run(\"az login\", shell=True)\n" + ] + }, + { + "cell_type": "markdown", + "id": "78e7d414", + "metadata": {}, + "source": [ + "## File to Analyze\n", + "\n", + "Use the following variable to define what file to analyze. For this example, we will be examining a small tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c9fb2b0", + "metadata": {}, + "outputs": [], + "source": [ + "VIDEO_LOCATION = \"https://github.com/Azure-Samples/azure-ai-content-understanding-assets/raw/refs/heads/main/videos/learning/learning2.mp4\"" + ] + }, + { + "cell_type": "markdown", + "id": "57b9abf6", + "metadata": {}, + "source": [ + "## Create a custom analyzer and submit the video to generate chapters\n", + "The custom analyzer schema for this notebook is [video_chapters_dynamic.json](../analyzer_templates/video_chapters_dynamic.json). This file defines the schema and configuration for a custom video analyzer. It specifies how a video should be segmented into chapters and scenes, including three chapter types: \"Topic Introduction\", \"Details About the Work Done\", and \"Conclusion or Results\". Each segment contains a list of scenes, with each scene described by a short description, start timestamp, and end timestamp. The configuration section controls segmentation behavior and other analysis options, while the fieldSchema section outlines the expected structure of the output, ensuring chapters and scenes are clearly organized and non-overlapping.\n", + "\n", + "In this example, we will use the utility class `AzureContentUnderstandingClient` to load the analyzer schema from the template file and submit it to Azure Content Understanding service. Then, we will analyze the video and generate the desired chapter and scene structure.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40e52230", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "import json\n", + "import uuid\n", + "\n", + "\n", + "# add the parent directory to the path to use shared modules\n", + "parent_dir = Path(Path.cwd()).parent\n", + "sys.path.append(\n", + " str(parent_dir)\n", + ")\n", + "from python.content_understanding_client import AzureContentUnderstandingClient\n", + "\n", + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", + "credential = DefaultAzureCredential()\n", + "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n", + "\n", + "# The analyzer template is used to define the schema of the output\n", + "ANALYZER_TEMPLATE_PATH = \"../analyzer_templates/video_chapters_dynamic.json\"\n", + "ANALYZER_ID = \"video_scene_chapter\" + \"_\" + str(uuid.uuid4()) # Unique identifier for the analyzer\n", + "\n", + "# Create the Content Understanding (CU) client\n", + "cu_client = AzureContentUnderstandingClient(\n", + " endpoint=AZURE_AI_SERVICE_ENDPOINT,\n", + " api_version=AZURE_AI_SERVICE_API_VERSION,\n", + " token_provider=token_provider,\n", + " x_ms_useragent=\"azure-ai-content-understanding-python/video_chapters_dynamic\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n", + ")\n", + "\n", + "# Use the client to create an analyzer\n", + "response = cu_client.begin_create_analyzer(\n", + " ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_PATH)\n", + "result = cu_client.poll_result(response)\n", + "\n", + "print(json.dumps(result, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "9b85cf38", + "metadata": {}, + "source": [ + "### Use the created analyzer to extract video content\n", + "It might take some time depending on the video length. Try with short videos to get results faster" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad31ab6a", + "metadata": {}, + "outputs": [], + "source": [ + "# Submit the video for content analysis\n", + "response = cu_client.begin_analyze(ANALYZER_ID, file_location=VIDEO_LOCATION)\n", + "\n", + "# Wait for the analysis to complete and get the content analysis result\n", + "video_cu_result = cu_client.poll_result(\n", + " response, timeout_seconds=3600) # 1 hour timeout for long videos\n", + "\n", + "# Print the content analysis result\n", + "print(f\"Video Content Understanding result: \", video_cu_result)\n", + "\n", + "# Optional - Delete the analyzer if it is no longer needed\n", + "cu_client.delete_analyzer(ANALYZER_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55d1342f", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display, HTML\n", + "from python.chapters_utility import ChaptersFormatter\n", + "\n", + "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result)\n", + "display(HTML(full_html))" + ] + }, + { + "cell_type": "markdown", + "id": "1126b263", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d1537fc204b577c8d8fb058336c6c86bbcbbab47 Mon Sep 17 00:00:00 2001 From: Gablans Date: Fri, 15 Aug 2025 17:24:27 +0000 Subject: [PATCH 6/9] Merging Dynamic and Structured into single book --- ...ic.ipynb => video_chapters_creation.ipynb} | 147 ++++++++++- notebooks/video_chapters_structured.ipynb | 240 ------------------ 2 files changed, 140 insertions(+), 247 deletions(-) rename notebooks/{video_chapters_dynamic.ipynb => video_chapters_creation.ipynb} (52%) delete mode 100644 notebooks/video_chapters_structured.ipynb diff --git a/notebooks/video_chapters_dynamic.ipynb b/notebooks/video_chapters_creation.ipynb similarity index 52% rename from notebooks/video_chapters_dynamic.ipynb rename to notebooks/video_chapters_creation.ipynb index 5d2bc11..6ff1015 100644 --- a/notebooks/video_chapters_dynamic.ipynb +++ b/notebooks/video_chapters_creation.ipynb @@ -5,7 +5,7 @@ "id": "11049ef0", "metadata": {}, "source": [ - "# Dynamic Video Chaptering" + "# Generating Video Chapters" ] }, { @@ -97,7 +97,7 @@ "source": [ "## File to Analyze\n", "\n", - "Use the following variable to define what file to analyze. For this example, we will be examining a small tutorial." + "Use the following variable to define what file to analyze. For this example, we will be examining a small tutorial. If you would like to try with your own videos, you could use a small video to get results faster." ] }, { @@ -110,13 +110,23 @@ "VIDEO_LOCATION = \"https://github.com/Azure-Samples/azure-ai-content-understanding-assets/raw/refs/heads/main/videos/learning/learning2.mp4\"" ] }, + { + "cell_type": "markdown", + "id": "19db5e47", + "metadata": {}, + "source": [ + "# Automated Chapter Discovery\n", + "\n", + "We will first use Content Understanding and let service interact with an LLM model to apply its own reasoning to segment the video into meaningful chapters, providing concise descriptions and timestamps." + ] + }, { "cell_type": "markdown", "id": "57b9abf6", "metadata": {}, "source": [ - "## Create a custom analyzer and submit the video to generate chapters\n", - "The custom analyzer schema for this notebook is [video_chapters_dynamic.json](../analyzer_templates/video_chapters_dynamic.json). This file defines the schema and configuration for a custom video analyzer. It specifies how a video should be segmented into chapters and scenes, including three chapter types: \"Topic Introduction\", \"Details About the Work Done\", and \"Conclusion or Results\". Each segment contains a list of scenes, with each scene described by a short description, start timestamp, and end timestamp. The configuration section controls segmentation behavior and other analysis options, while the fieldSchema section outlines the expected structure of the output, ensuring chapters and scenes are clearly organized and non-overlapping.\n", + "## Create a Custom Analyzer\n", + "The custom analyzer schema for this notebook is [video_chapters_dynamic.json](../analyzer_templates/video_chapters_dynamic.json). This file defines the schema and configuration for a custom video analyzer that uses AI to dynamically generate chapters and scenes based on its understanding of the video content. The service analyzes the video and determines the most meaningful chapter divisions, providing start and end timestamps for each. The configuration section allows flexible segmentation, enabling the model to reason about the content and organize chapters in a way that best fits.\n", "\n", "In this example, we will use the utility class `AzureContentUnderstandingClient` to load the analyzer schema from the template file and submit it to Azure Content Understanding service. Then, we will analyze the video and generate the desired chapter and scene structure.\n" ] @@ -171,7 +181,7 @@ "metadata": {}, "source": [ "### Use the created analyzer to extract video content\n", - "It might take some time depending on the video length. Try with short videos to get results faster" + "With the analyzer created, request video content extraction using content understanding." ] }, { @@ -195,6 +205,14 @@ "cu_client.delete_analyzer(ANALYZER_ID)" ] }, + { + "cell_type": "markdown", + "id": "afde825e", + "metadata": {}, + "source": [ + "Use the following utility to display results for visual review." + ] + }, { "cell_type": "code", "execution_count": null, @@ -211,9 +229,124 @@ }, { "cell_type": "markdown", - "id": "1126b263", + "id": "985e9193", + "metadata": {}, + "source": [ + "# Structured Chapter Creation\n", + "\n", + "Alternatively, you can define a specific structure for chaptering that Content Understanding will use to guide the model. When your request includes information detailing clear chapter types and organization, the service can instruct the model to segment the video according to your desired structure, ensuring consistent and predictable chapter creation.\n", + "\n", + "## Create a Custom Analyzer\n", + "The custom analyzer schema for this notebook is [video_chapters_structured.json](../analyzer_templates/video_chapters_structured.json). This file defines the schema and configuration for a custom video analyzer. In this example, it specifies how a video should be segmented into chapters and scenes, including three chapter types: \"Topic Introduction\", \"Details About the Work Done\", and \"Conclusion or Results\". Each segment contains a list of scenes, with each scene described by a short description, start timestamp, and end timestamp. The configuration section controls segmentation behavior and other analysis options, while the fieldSchema section outlines the expected structure of the output, ensuring chapters and scenes are clearly organized and non-overlapping.\n", + "\n", + "We we will also be using the utility class `AzureContentUnderstandingClient` to load the analyzer schema from the template file and submit it to Azure Content Understanding service. Then, we will analyze the video and generate the desired chapter and scene structure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0afa000b", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "import json\n", + "import uuid\n", + "\n", + "\n", + "# add the parent directory to the path to use shared modules\n", + "parent_dir = Path(Path.cwd()).parent\n", + "sys.path.append(\n", + " str(parent_dir)\n", + ")\n", + "from python.content_understanding_client import AzureContentUnderstandingClient\n", + "\n", + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", + "credential = DefaultAzureCredential()\n", + "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n", + "\n", + "# The analyzer template is used to define the schema of the output\n", + "ANALYZER_TEMPLATE_PATH = \"../analyzer_templates/video_chapters_structured.json\"\n", + "ANALYZER_ID = \"video_scene_chapter\" + \"_\" + str(uuid.uuid4()) # Unique identifier for the analyzer\n", + "\n", + "# Create the Content Understanding (CU) client\n", + "cu_client = AzureContentUnderstandingClient(\n", + " endpoint=AZURE_AI_SERVICE_ENDPOINT,\n", + " api_version=AZURE_AI_SERVICE_API_VERSION,\n", + " token_provider=token_provider,\n", + " x_ms_useragent=\"azure-ai-content-understanding-python/video_chapters_structured\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n", + ")\n", + "\n", + "# Use the client to create an analyzer\n", + "response = cu_client.begin_create_analyzer(\n", + " ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_PATH)\n", + "result = cu_client.poll_result(response)\n", + "\n", + "print(json.dumps(result, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "5b9cac5c", + "metadata": {}, + "source": [ + "### Use the created analyzer to extract video content\n", + "It might take some time depending on the video length. Try with short videos to get results faster" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f416bf7", + "metadata": {}, + "outputs": [], + "source": [ + "# Submit the video for content analysis\n", + "response = cu_client.begin_analyze(ANALYZER_ID, file_location=VIDEO_LOCATION)\n", + "\n", + "# Wait for the analysis to complete and get the content analysis result\n", + "video_cu_result = cu_client.poll_result(\n", + " response, timeout_seconds=3600) # 1 hour timeout for long videos\n", + "\n", + "# Print the content analysis result\n", + "print(f\"Video Content Understanding result: \", video_cu_result)\n", + "\n", + "# Optional - Delete the analyzer if it is no longer needed\n", + "cu_client.delete_analyzer(ANALYZER_ID)" + ] + }, + { + "cell_type": "markdown", + "id": "0200883b", + "metadata": {}, + "source": [ + "Use the following utility to display results for visual review." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c744700", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display, HTML\n", + "from python.chapters_utility import ChaptersFormatter\n", + "\n", + "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result)\n", + "display(HTML(full_html))" + ] + }, + { + "cell_type": "markdown", + "id": "54f8d5e2", "metadata": {}, - "source": [] + "source": [ + "# Conclusion\n", + "\n", + "This notebook has demonstrated how Azure Content Understanding, combined with advanced language models, can automatically generate meaningful chapters and scenes from video content. By leveraging AI-driven analysis, you can quickly create structured, searchable tables of contents for any video, making it easier to navigate and understand complex material. This approach streamlines video processing and enables new possibilities for content organization and discovery." + ] } ], "metadata": { diff --git a/notebooks/video_chapters_structured.ipynb b/notebooks/video_chapters_structured.ipynb deleted file mode 100644 index a80d9d3..0000000 --- a/notebooks/video_chapters_structured.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "11049ef0", - "metadata": {}, - "source": [ - "# Video Chapters Generation" - ] - }, - { - "cell_type": "markdown", - "id": "beccbe11", - "metadata": {}, - "source": [ - "Generate video chapters based on Azure Content Understanding and Azure OpenAI." - ] - }, - { - "cell_type": "markdown", - "id": "0a44bdf4", - "metadata": {}, - "source": [ - "\n", - "## Pre-requisites\n", - "1. Follow [README](../README.md#configure-azure-ai-service-resource) to create essential resource that will be used in this sample.\n", - "1. Install required packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3dfa60be", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -r ../requirements.txt" - ] - }, - { - "cell_type": "markdown", - "id": "fcefeaab", - "metadata": {}, - "source": [ - "## Load environment variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c69047b", - "metadata": {}, - "outputs": [], - "source": [ - "from dotenv import load_dotenv\n", - "import os\n", - "\n", - "load_dotenv(dotenv_path=\".env\", override=True)\n", - "\n", - "AZURE_AI_SERVICE_ENDPOINT = os.getenv(\"AZURE_AI_SERVICE_ENDPOINT\")\n", - "AZURE_AI_SERVICE_API_VERSION = os.getenv(\"AZURE_AI_SERVICE_API_VERSION\", \"2025-05-01-preview\")\n", - "\n", - "AZURE_OPENAI_ENDPOINT = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n", - "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-08-01-preview\")\n", - "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv(\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\")" - ] - }, - { - "cell_type": "markdown", - "id": "7fe969de", - "metadata": {}, - "source": [ - "If you haven't done so, please authenticate by running **'az login'** through the terminal. This credentials are used to validate that you have access to the resources you defined above.\n", - "\n", - "Make sure you have Azure CLI installed on your system. To install --> curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ea18419", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "## Authehticate if you are running this notebook for the first time.\n", - "\n", - "import subprocess\n", - "\n", - "subprocess.run(\"az login\", shell=True)\n" - ] - }, - { - "cell_type": "markdown", - "id": "78e7d414", - "metadata": {}, - "source": [ - "## File to Analyze\n", - "\n", - "Use the following variable to define what file to analyze. For this example, we will be examining a small tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c9fb2b0", - "metadata": {}, - "outputs": [], - "source": [ - "VIDEO_LOCATION = \"https://github.com/Azure-Samples/azure-ai-content-understanding-assets/raw/refs/heads/main/videos/learning/learning2.mp4\"" - ] - }, - { - "cell_type": "markdown", - "id": "57b9abf6", - "metadata": {}, - "source": [ - "## Create a custom analyzer and submit the video to generate chapters\n", - "The custom analyzer schema for this notebook is [video_chapters_structured.json](../analyzer_templates/video_chapters_structured.json). This file defines the schema and configuration for a custom video analyzer. It specifies how a video should be segmented into chapters and scenes, including three chapter types: \"Topic Introduction\", \"Details About the Work Done\", and \"Conclusion or Results\". Each segment contains a list of scenes, with each scene described by a short description, start timestamp, and end timestamp. The configuration section controls segmentation behavior and other analysis options, while the fieldSchema section outlines the expected structure of the output, ensuring chapters and scenes are clearly organized and non-overlapping.\n", - "\n", - "In this example, we will use the utility class `AzureContentUnderstandingClient` to load the analyzer schema from the template file and submit it to Azure Content Understanding service. Then, we will analyze the video and generate the desired chapter and scene structure.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40e52230", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "from pathlib import Path\n", - "import json\n", - "import uuid\n", - "\n", - "\n", - "# add the parent directory to the path to use shared modules\n", - "parent_dir = Path(Path.cwd()).parent\n", - "sys.path.append(\n", - " str(parent_dir)\n", - ")\n", - "from python.content_understanding_client import AzureContentUnderstandingClient\n", - "\n", - "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", - "credential = DefaultAzureCredential()\n", - "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n", - "\n", - "# The analyzer template is used to define the schema of the output\n", - "ANALYZER_TEMPLATE_PATH = \"../analyzer_templates/video_chapters_structured.json\"\n", - "ANALYZER_ID = \"video_scene_chapter\" + \"_\" + str(uuid.uuid4()) # Unique identifier for the analyzer\n", - "\n", - "# Create the Content Understanding (CU) client\n", - "cu_client = AzureContentUnderstandingClient(\n", - " endpoint=AZURE_AI_SERVICE_ENDPOINT,\n", - " api_version=AZURE_AI_SERVICE_API_VERSION,\n", - " token_provider=token_provider,\n", - " x_ms_useragent=\"azure-ai-content-understanding-python/video_chapters_structured\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n", - ")\n", - "\n", - "# Use the client to create an analyzer\n", - "response = cu_client.begin_create_analyzer(\n", - " ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_PATH)\n", - "result = cu_client.poll_result(response)\n", - "\n", - "print(json.dumps(result, indent=2))" - ] - }, - { - "cell_type": "markdown", - "id": "9b85cf38", - "metadata": {}, - "source": [ - "### Use the created analyzer to extract video content\n", - "It might take some time depending on the video length. Try with short videos to get results faster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad31ab6a", - "metadata": {}, - "outputs": [], - "source": [ - "# Submit the video for content analysis\n", - "response = cu_client.begin_analyze(ANALYZER_ID, file_location=VIDEO_LOCATION)\n", - "\n", - "# Wait for the analysis to complete and get the content analysis result\n", - "video_cu_result = cu_client.poll_result(\n", - " response, timeout_seconds=3600) # 1 hour timeout for long videos\n", - "\n", - "# Print the content analysis result\n", - "print(f\"Video Content Understanding result: \", video_cu_result)\n", - "\n", - "# Optional - Delete the analyzer if it is no longer needed\n", - "cu_client.delete_analyzer(ANALYZER_ID)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55d1342f", - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import display, HTML\n", - "from python.chapters_utility import ChaptersFormatter\n", - "\n", - "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result)\n", - "display(HTML(full_html))" - ] - }, - { - "cell_type": "markdown", - "id": "1126b263", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 8ef9a9ba3b6f211a5e175ceb5f2d48906aefd5bd Mon Sep 17 00:00:00 2001 From: Gablans Date: Fri, 15 Aug 2025 17:34:14 +0000 Subject: [PATCH 7/9] Fixing PR comments to clarify request --- analyzer_templates/video_chapters_dynamic.json | 8 +++----- analyzer_templates/video_chapters_structured.json | 10 +++------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/analyzer_templates/video_chapters_dynamic.json b/analyzer_templates/video_chapters_dynamic.json index 18a56f6..8097a98 100644 --- a/analyzer_templates/video_chapters_dynamic.json +++ b/analyzer_templates/video_chapters_dynamic.json @@ -1,10 +1,9 @@ { - "description": "test model", + "description": "Dynamic Chaptering", "scenario": "videoShot", "config": { "returnDetails": true, "enableSegmentation": true, - "disableContentFiltering": true, "segmentationMode": "custom", "segmentationDefinition": "Segment the video into stories or chapters. A story (chapter) in a video is a self-contained portion of the program dedicated to a specific news story, topic, or theme. Each segment typically includes a distinct introduction, development, and (sometimes) a conclusion, and can feature a combination of elements such as reporter narration, interviews, sound bites, relevant footage (B-roll), and graphics.", "locales": [ @@ -13,8 +12,7 @@ }, "BaseAnalyzerId": "prebuilt-videoAnalyzer", "fieldSchema": { - "name": "Content Understanding", - "description": "NO USE", + "name": "Content Understanding - Dynamic Chaptering", "fields": { "Segments": { "type": "array", @@ -35,7 +33,7 @@ "Description": { "type": "string", "method": "generate", - "description": "A 5 word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." + "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." }, "StartTimestamp": { "type": "string", diff --git a/analyzer_templates/video_chapters_structured.json b/analyzer_templates/video_chapters_structured.json index 5503371..fc8dc87 100644 --- a/analyzer_templates/video_chapters_structured.json +++ b/analyzer_templates/video_chapters_structured.json @@ -1,12 +1,9 @@ { - "description": "test model", + "description": "Structured Chaptering", "scenario": "videoShot", "config": { "returnDetails": true, "enableSegmentation": true, - "enableFace": false, - "disableFaceBlurring": true, - "disableContentFiltering": true, "segmentationMode": "custom", "segmentationDefinition": "Segment the video into only three chapter types: 'Topic Introduction', 'Details About the Work Done', and 'Outcome, Conclusion and Results'. Reason about the content and determine the best time to segment the video according to these chapter types. Use the timestamp of each image to identify the start and end time of each chapter, and avoid chapter overlap. You must always define the three chapter types, and each chapter must have at least one scene.", "locales": [ @@ -15,8 +12,7 @@ }, "BaseAnalyzerId": "prebuilt-videoAnalyzer", "fieldSchema": { - "name": "Content Understanding", - "description": "NO USE", + "name": "Content Understanding - Structured Chaptering", "fields": { "Segments": { "type": "array", @@ -37,7 +33,7 @@ "Description": { "type": "string", "method": "generate", - "description": "A 5 word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." + "description": "A five-word description of the scene. A scene is a smaller segment of the segment where a continous block for storytelling unfolds within a specific time, place, and set of characters. A scene can only belong to a single chapter, and cannot overlap with other scenes. Scenes are sequential across the video." }, "StartTimestamp": { "type": "string", From 46efebbe503173d8df2bb1c77dd4e20e09f58f76 Mon Sep 17 00:00:00 2001 From: Gablans Date: Fri, 15 Aug 2025 19:20:09 +0000 Subject: [PATCH 8/9] Adjusting Chaptering Utility to match Type --- notebooks/video_chapters_creation.ipynb | 4 +- python/chapters_utility.py | 104 ++++++++++++++++++------ 2 files changed, 80 insertions(+), 28 deletions(-) diff --git a/notebooks/video_chapters_creation.ipynb b/notebooks/video_chapters_creation.ipynb index 6ff1015..f0950f1 100644 --- a/notebooks/video_chapters_creation.ipynb +++ b/notebooks/video_chapters_creation.ipynb @@ -223,7 +223,8 @@ "from IPython.display import display, HTML\n", "from python.chapters_utility import ChaptersFormatter\n", "\n", - "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result)\n", + "# For dynamic chaptering, pass dynamic=True\n", + "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result, dynamic=True)\n", "display(HTML(full_html))" ] }, @@ -334,6 +335,7 @@ "from IPython.display import display, HTML\n", "from python.chapters_utility import ChaptersFormatter\n", "\n", + "# For structured chaptering, use default (dynamic=False)\n", "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result)\n", "display(HTML(full_html))" ] diff --git a/python/chapters_utility.py b/python/chapters_utility.py index cbf8cec..3ba76f4 100644 --- a/python/chapters_utility.py +++ b/python/chapters_utility.py @@ -2,38 +2,88 @@ class ChaptersFormatter: """Formating Utility for Table of Contents""" - def format_chapters_output(video_URL, video_cu_result): + def format_chapters_output(video_URL, video_cu_result, dynamic=False): """Formats the chapters output for the video.""" - segments = ( - video_cu_result - .get("result", {}) - .get("contents", [])[0] - .get("fields", {}) - .get("Segments", {}) - .get("valueArray", []) - ) - toc_html = "
" toc_html += "Table of Contents
    " - for idx, segment in enumerate(segments): - seg_obj = segment.get("valueObject", {}) - seg_type = seg_obj.get("SegmentType", {}).get("valueString", "Unknown") - toc_html += f"
  • {seg_type}
      " - scenes = seg_obj.get("Scenes", {}).get("valueArray", []) - for sidx, scene in enumerate(scenes): - scene_obj = scene.get("valueObject", {}) - desc = scene_obj.get("Description", {}).get("valueString", "No description") - start = scene_obj.get("StartTimestamp", {}).get("valueString", "N/A") - h, m, s = [float(x) if '.' in x else int(x) for x in start.split(':')] - seconds = int(h) * 3600 + int(m) * 60 + float(s) - toc_html += ( - f"
    • " - f"
    • " + if dynamic: + # Try to use chapters/scenes if present, else fallback to Segments + chapters = video_cu_result.get("chapters") + scenes = video_cu_result.get("scenes") + if chapters and scenes: + scene_lookup = {scene.get("scene_id"): scene for scene in scenes} + for chapter in chapters: + title = chapter.get("title", "Unknown Chapter") + toc_html += f"
    • {title}
        " + for scene_id in chapter.get("scene_ids", []): + scene = scene_lookup.get(scene_id, {}) + desc = scene.get("description", "No description") + start = scene.get("start_timestamp", "N/A") + h, m, s = [float(x) if '.' in x else int(x) for x in start.split(':')] + seconds = int(h) * 3600 + int(m) * 60 + float(s) + toc_html += ( + f"
      • " + f"
      • " + ) + toc_html += "
    • " + else: + # Fallback to Segments structure + segments = ( + video_cu_result + .get("result", {}) + .get("contents", [])[0] + .get("fields", {}) + .get("Segments", {}) + .get("valueArray", []) ) - toc_html += "
  • " + for idx, segment in enumerate(segments): + seg_obj = segment.get("valueObject", {}) + seg_type = seg_obj.get("SegmentType", {}).get("valueString", "Unknown") + toc_html += f"
  • {seg_type}
      " + scenes = seg_obj.get("Scenes", {}).get("valueArray", []) + for sidx, scene in enumerate(scenes): + scene_obj = scene.get("valueObject", {}) + desc = scene_obj.get("Description", {}).get("valueString", "No description") + start = scene_obj.get("StartTimestamp", {}).get("valueString", "N/A") + h, m, s = [float(x) if '.' in x else int(x) for x in start.split(':')] + seconds = int(h) * 3600 + int(m) * 60 + float(s) + toc_html += ( + f"
    • " + f"
    • " + ) + toc_html += "
  • " + else: + segments = ( + video_cu_result + .get("result", {}) + .get("contents", [])[0] + .get("fields", {}) + .get("Segments", {}) + .get("valueArray", []) + ) + for idx, segment in enumerate(segments): + seg_obj = segment.get("valueObject", {}) + seg_type = seg_obj.get("SegmentType", {}).get("valueString", "Unknown") + toc_html += f"
  • {seg_type}
      " + scenes = seg_obj.get("Scenes", {}).get("valueArray", []) + for sidx, scene in enumerate(scenes): + scene_obj = scene.get("valueObject", {}) + desc = scene_obj.get("Description", {}).get("valueString", "No description") + start = scene_obj.get("StartTimestamp", {}).get("valueString", "N/A") + h, m, s = [float(x) if '.' in x else int(x) for x in start.split(':')] + seconds = int(h) * 3600 + int(m) * 60 + float(s) + toc_html += ( + f"
    • " + f"
    • " + ) + toc_html += "
  • " toc_html += "
" full_html = f""" From e719ec85844b6e17843b032fbab1d8cbf89a29b2 Mon Sep 17 00:00:00 2001 From: Gablans Date: Fri, 15 Aug 2025 19:43:48 +0000 Subject: [PATCH 9/9] Removing unused file after merge --- notebooks/video_chapters_structured.ipynb | 248 ---------------------- 1 file changed, 248 deletions(-) delete mode 100644 notebooks/video_chapters_structured.ipynb diff --git a/notebooks/video_chapters_structured.ipynb b/notebooks/video_chapters_structured.ipynb deleted file mode 100644 index e0a06ae..0000000 --- a/notebooks/video_chapters_structured.ipynb +++ /dev/null @@ -1,248 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "11049ef0", - "metadata": {}, - "source": [ - "# Structured Video Chaptering" - ] - }, - { - "cell_type": "markdown", - "id": "beccbe11", - "metadata": {}, - "source": [ - "This notebook demonstrates how to automatically generate video chapters that follow a defined structure using Azure Content Understanding and GPT. You can define a specific content structure to be followed by the LLM model to segment the video into meaningful chapters and scenes, providing concise descriptions and timestamps." - ] - }, - { - "cell_type": "markdown", - "id": "0a44bdf4", - "metadata": {}, - "source": [ - "\n", - "## Pre-requisites\n", - "1. Follow [README](../README.md#configure-azure-ai-service-resource) to create essential resource that will be used in this sample.\n", - "1. Install required packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3dfa60be", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -r ../requirements.txt" - ] - }, - { - "cell_type": "markdown", - "id": "fcefeaab", - "metadata": {}, - "source": [ - "## Load environment variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c69047b", - "metadata": {}, - "outputs": [], - "source": [ - "from dotenv import load_dotenv\n", - "import os\n", - "\n", - "load_dotenv(dotenv_path=\".env\", override=True)\n", - "\n", - "AZURE_AI_SERVICE_ENDPOINT = os.getenv(\"AZURE_AI_SERVICE_ENDPOINT\")\n", - "AZURE_AI_SERVICE_API_VERSION = os.getenv(\"AZURE_AI_SERVICE_API_VERSION\", \"2025-05-01-preview\")\n", - "\n", - "AZURE_OPENAI_ENDPOINT = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n", - "AZURE_OPENAI_API_VERSION = os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-08-01-preview\")\n", - "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv(\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\")" - ] - }, - { - "cell_type": "markdown", - "id": "7fe969de", - "metadata": {}, - "source": [ - "If you haven't done so, please authenticate by running **'az login'** through the terminal. This credentials are used to validate that you have access to the resources you defined above.\n", - "\n", - "Make sure you have Azure CLI installed on your system. To install --> curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ea18419", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "## Authehticate if you are running this notebook for the first time.\n", - "\n", - "import subprocess\n", - "\n", - "subprocess.run(\"az login\", shell=True)\n" - ] - }, - { - "cell_type": "markdown", - "id": "78e7d414", - "metadata": {}, - "source": [ - "## File to Analyze\n", - "\n", - "Use the following variable to define what video file to analyze. For this example, we will be examining a small tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c9fb2b0", - "metadata": {}, - "outputs": [], - "source": [ - "VIDEO_LOCATION = \"https://github.com/Azure-Samples/azure-ai-content-understanding-assets/raw/refs/heads/main/videos/learning/learning2.mp4\"" - ] - }, - { - "cell_type": "markdown", - "id": "57b9abf6", - "metadata": {}, - "source": [ - "## Create a custom analyzer and submit the video to generate chapters\n", - "The custom analyzer schema for this notebook is [video_chapters_structured.json](../analyzer_templates/video_chapters_structured.json). This file defines the schema and configuration for a custom video analyzer. It specifies how a video should be segmented into chapters and scenes, including three chapter types: \"Topic Introduction\", \"Details About the Work Done\", and \"Conclusion or Results\". Each segment contains a list of scenes, with each scene described by a short description, start timestamp, and end timestamp. The configuration section controls segmentation behavior and other analysis options, while the fieldSchema section outlines the expected structure of the output, ensuring chapters and scenes are clearly organized and non-overlapping.\n", - "\n", - "In this example, we will use the utility class `AzureContentUnderstandingClient` to load the analyzer schema from the template file and submit it to Azure Content Understanding service. Then, we will analyze the video and generate the desired chapter and scene structure.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40e52230", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "from pathlib import Path\n", - "import json\n", - "import uuid\n", - "\n", - "\n", - "# add the parent directory to the path to use shared modules\n", - "parent_dir = Path(Path.cwd()).parent\n", - "sys.path.append(\n", - " str(parent_dir)\n", - ")\n", - "from python.content_understanding_client import AzureContentUnderstandingClient\n", - "\n", - "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", - "credential = DefaultAzureCredential()\n", - "token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n", - "\n", - "# The analyzer template is used to define the schema of the output\n", - "ANALYZER_TEMPLATE_PATH = \"../analyzer_templates/video_chapters_structured.json\"\n", - "ANALYZER_ID = \"video_scene_chapter\" + \"_\" + str(uuid.uuid4()) # Unique identifier for the analyzer\n", - "\n", - "# Create the Content Understanding (CU) client\n", - "cu_client = AzureContentUnderstandingClient(\n", - " endpoint=AZURE_AI_SERVICE_ENDPOINT,\n", - " api_version=AZURE_AI_SERVICE_API_VERSION,\n", - " token_provider=token_provider,\n", - " x_ms_useragent=\"azure-ai-content-understanding-python/video_chapters_structured\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n", - ")\n", - "\n", - "# Use the client to create an analyzer\n", - "response = cu_client.begin_create_analyzer(\n", - " ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_PATH)\n", - "result = cu_client.poll_result(response)\n", - "\n", - "print(json.dumps(result, indent=2))" - ] - }, - { - "cell_type": "markdown", - "id": "9b85cf38", - "metadata": {}, - "source": [ - "### Use the created analyzer to extract video content\n", - "It might take some time depending on the video length. Try with short videos to get results faster." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad31ab6a", - "metadata": {}, - "outputs": [], - "source": [ - "# Submit the video for content analysis\n", - "response = cu_client.begin_analyze(ANALYZER_ID, file_location=VIDEO_LOCATION)\n", - "\n", - "# Wait for the analysis to complete and get the content analysis result\n", - "video_cu_result = cu_client.poll_result(\n", - " response, timeout_seconds=3600) # 1 hour timeout for long videos\n", - "\n", - "# Print the content analysis result\n", - "print(f\"Video Content Understanding result: \", video_cu_result)\n", - "\n", - "# Optional - Delete the analyzer if it is no longer needed\n", - "cu_client.delete_analyzer(ANALYZER_ID)" - ] - }, - { - "cell_type": "markdown", - "id": "0018b59f", - "metadata": {}, - "source": [ - "We will now display the results with the following code:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55d1342f", - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import display, HTML\n", - "from python.chapters_utility import ChaptersFormatter\n", - "\n", - "full_html = ChaptersFormatter.format_chapters_output(VIDEO_LOCATION, video_cu_result)\n", - "display(HTML(full_html))" - ] - }, - { - "cell_type": "markdown", - "id": "1126b263", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}