In [None]:
#!pip install sqlalchemy psycopg2-binary minio weaviate-client pandas

In [None]:
# Cell 0: Service Endpoints & Credentials

# Postgres (for Resolve or your own metadata DB)
PG_HOST = "localhost"
PG_PORT = 5432
PG_DB   = "resolveLibrary"
PG_USER = "davinci"
PG_PASS = "supersecret"

# MinIO (for object storage)
MINIO_ENDPOINT = "localhost:9000"
MINIO_ACCESS   = "minio"
MINIO_SECRET   = "minio123"

# Weaviate (vector store)
WEAVIATE_URL   = "http://localhost:8080"

# Example imports
import os, pandas as pd
from sqlalchemy import create_engine
from minio import Minio
import weaviate

# Create clients
engine = create_engine(f"postgresql://{PG_USER}:{PG_PASS}@{PG_HOST}:{PG_PORT}/{PG_DB}")
minio_client = Minio(MINIO_ENDPOINT, access_key=MINIO_ACCESS, secret_key=MINIO_SECRET, secure=False)
weaviate_client = weaviate.Client(WEAVIATE_URL)

print("✅ Clients initialized!",
      f"Postgres: {PG_HOST}:{PG_PORT}/{PG_DB}",
      f"MinIO: {MINIO_ENDPOINT}",
      f"Weaviate: {WEAVIATE_URL}", sep="\n")

# If using `.env` and `requirements.txt`

In [None]:
!pip install --upgrade pip
!pip install -r requirements.txt

In [None]:
# Cell 1: Load environment and initialize clients
import os
from dotenv import load_dotenv

# Load .env file
load_dotenv(dotenv_path=".env")

# Read into variables
PG_HOST     = os.getenv("PG_HOST")
PG_PORT     = os.getenv("PG_PORT")
PG_DB       = os.getenv("PG_DB")
PG_USER     = os.getenv("PG_USER")
PG_PASS     = os.getenv("PG_PASS")

MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT")
MINIO_ACCESS   = os.getenv("MINIO_ACCESS")
MINIO_SECRET   = os.getenv("MINIO_SECRET")
MINIO_SECURE   = os.getenv("MINIO_SECURE") == "true"

WEAVIATE_URL = os.getenv("WEAVIATE_URL")

# Initialize clients
from sqlalchemy import create_engine
from minio import Minio
import weaviate

engine = create_engine(f"postgresql://{PG_USER}:{PG_PASS}@{PG_HOST}:{PG_PORT}/{PG_DB}")
minio_client = Minio(
    endpoint=MINIO_ENDPOINT,
    access_key=MINIO_ACCESS,
    secret_key=MINIO_SECRET,
    secure=MINIO_SECURE
)
weaviate_client = weaviate.Client(url=WEAVIATE_URL)

print("✅ Clients ready:")
print(f" • Postgres: {PG_USER}@{PG_HOST}:{PG_PORT}/{PG_DB}")
print(f" • MinIO: {MINIO_ENDPOINT} (secure={MINIO_SECURE})")
print(f" • Weaviate: {WEAVIATE_URL}")

# Video Batch Processing Notebook Structure

This is how your notebook cells should be organized when the `.ipynb` file is at the repo root:

## File Structure Expected:

```
📁 your-repo/                    ← Repo root (where .ipynb is located)
├── 📓 batching_video_data.ipynb ← Your main notebook
├── 📁 scripts/                  ← Your existing scripts folder
├── 📁 Batches/                  ← Optional: traditional structure
└── 📁 ../                       ← Parent directory where video batches live
    ├── 📁 Z7V_1641/             ← Video batch (parent dir = batch name)
    │   ├── 🎬 video1.mp4
    │   └── 🎬 video2.mp4
    ├── 📁 Z7V_1642/             ← Another batch
    │   └── 🎬 video3.mp4
    └── 📁 StockFootage/         ← Your organized stock footage
        └── 📁 Z7V_1643/
            └── 🎬 video5.mp4
```

## Cell Execution Order:

1. **Cell 1**: Setup and imports - establishes repo root context
1. **Cell 2**: Discovery functions - defines batch detection logic
1. **Cell 3**: Run discovery - finds batches in current or parent directory
1. **Cell 4**: Generate metadata - creates comprehensive batch information
1. **Cell 5**: Create DataFrame - converts to pandas for analysis
1. **Cell 6**: Save results - exports JSON and CSV files to repo root
1. **Cell 7**: Interactive analysis - provides functions for exploration
1. **Cell 8**: Utility functions - additional tools for processing

## Key Features:

- **Auto-detects repo location**: Uses `Path.cwd()` to establish repo root
- **Smart directory scanning**: Checks current directory first, then parent
- **Batch identification**: Groups videos by their parent directory names
- **Comprehensive metadata**: Generates JSON with all batch information
- **DataFrame creation**: Makes data analysis easy with pandas
- **File output**: Saves results to repo root for version control
- **Interactive tools**: Provides functions for exploring batches

## Usage Pattern:

1. Place notebook at repo root
1. Run all cells sequentially
1. Use interactive functions to analyze specific batches
1. Generated files (`batch_metadata.json`, `video_inventory.csv`) stay in repo
1. Can be deployed anywhere with the workspace setup scripts

This approach keeps your notebook portable while maintaining the ability to discover and process video batches regardless of where they’re located relative to your repo.

In [None]:
# Cell 1: Setup and Imports
import os
import json
import glob
from pathlib import Path
from collections import defaultdict
import hashlib
from datetime import datetime
import pandas as pd

# Set working directory to repo root (where this notebook is located)
repo_root = Path.cwd()
print(f"📂 Repository root: {repo_root}")

# Video file extensions to look for
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.wmv', '.flv', '.webm'}

print("✅ Setup complete")

In [None]:
# Cell 1.5: Configuration - Insert this between Cell 1 and Cell 2
# =======================================================================
# CONFIGURATION CELL - Customize your batch discovery behavior
# =======================================================================

# Target directories to scan (if empty, scans current and parent directories)
TARGET_DIRECTORIES = [
    # Examples (uncomment and modify as needed):
    # ".",                          # Current directory (repo root)
    # "..",                         # Parent directory
    # "../StockFootage",            # Specific path
    # "/path/to/video/storage",     # Absolute path
    # "~/Videos/Batches",           # Home directory relative
]

# Directory names to specifically include (if empty, includes all directories with videos)
INCLUDE_BATCH_NAMES = [
    # Examples (uncomment and modify as needed):
    # "Z7V_*",                      # Wildcard pattern
    # "Batch*",                     # Another wildcard
    # "StockFootage",               # Specific directory name
    # "2024*",                      # Year-based batches
]

# Directory names to ignore/exclude
IGNORE_DIRECTORY_NAMES = [
    # Common directories to skip
    ".git", ".vscode", "__pycache__", ".DS_Store", "node_modules",
    ".ipynb_checkpoints", "venv", "env", ".venv",
    
    # Add your custom exclusions:
    # "temp", "backup", "old", "archive", "trash",
    # "rendered", "exports", "thumbnails",
]

# File size filters (optional)
MIN_FILE_SIZE_MB = 0        # Minimum file size in MB (0 = no minimum)
MAX_FILE_SIZE_GB = 50       # Maximum file size in GB (0 = no maximum, 50 = reasonable default)

# Additional video extensions (beyond the defaults)
ADDITIONAL_VIDEO_EXTENSIONS = [
    # Add any custom video formats you use:
    # ".mxf", ".prores", ".dnxhd", ".r3d", ".braw"
]

# Batch naming options
BATCH_NAME_TRANSFORM = "none"  # Options: "none", "uppercase", "lowercase", "title"
REMOVE_BATCH_PREFIXES = []     # Remove these prefixes from batch names, e.g., ["Batch_", "Video_"]
REMOVE_BATCH_SUFFIXES = []     # Remove these suffixes from batch names, e.g., ["_raw", "_temp"]

# Output file locations (relative to repo root or absolute paths)
OUTPUT_DIRECTORY = "."         # Where to save metadata files ("." = repo root)
METADATA_FILENAME = "batch_metadata.json"
INVENTORY_FILENAME = "video_inventory.csv"
WORKSPACE_INFO_FILENAME = "workspace_info.json"

# Session-specific metadata file (useful for temporary/local runs)
SESSION_METADATA_LOCATION = None  # None = use OUTPUT_DIRECTORY, or specify custom path
# Examples:
# SESSION_METADATA_LOCATION = "../session_data"     # Parent directory
# SESSION_METADATA_LOCATION = "/tmp/video_session"  # Temporary location
# SESSION_METADATA_LOCATION = "~/Desktop"           # Desktop for easy access

print("⚙️ Configuration loaded:")
print(f"   Target directories: {TARGET_DIRECTORIES or 'Auto-detect (current and parent)'}")
print(f"   Include patterns: {INCLUDE_BATCH_NAMES or 'All directories with videos'}")
print(f"   Ignore directories: {len(IGNORE_DIRECTORY_NAMES)} patterns")
print(f"   File size range: {MIN_FILE_SIZE_MB}MB - {MAX_FILE_SIZE_GB}GB")
print(f"   Additional video formats: {ADDITIONAL_VIDEO_EXTENSIONS or 'None'}")
print(f"   Output directory: {OUTPUT_DIRECTORY}")
print(f"   Session metadata: {SESSION_METADATA_LOCATION or 'Same as output directory'}")
print("✅ Ready to discover batches with custom settings")

# Helper functions for configuration
import fnmatch
from pathlib import Path

def should_include_directory(dir_name, dir_path):
    """Check if directory should be included based on configuration"""
    
    # Check ignore list first
    for ignore_pattern in IGNORE_DIRECTORY_NAMES:
        if fnmatch.fnmatch(dir_name.lower(), ignore_pattern.lower()):
            return False
    
    # If include list is specified, directory must match one of the patterns
    if INCLUDE_BATCH_NAMES:
        for include_pattern in INCLUDE_BATCH_NAMES:
            if fnmatch.fnmatch(dir_name, include_pattern):
                return True
        return False  # Didn't match any include pattern
    
    return True  # No include list specified, so include by default

def transform_batch_name(batch_name):
    """Apply batch name transformations"""
    name = batch_name
    
    # Remove prefixes
    for prefix in REMOVE_BATCH_PREFIXES:
        if name.startswith(prefix):
            name = name[len(prefix):]
    
    # Remove suffixes
    for suffix in REMOVE_BATCH_SUFFIXES:
        if name.endswith(suffix):
            name = name[:-len(suffix)]
    
    # Apply case transformation
    if BATCH_NAME_TRANSFORM == "uppercase":
        name = name.upper()
    elif BATCH_NAME_TRANSFORM == "lowercase":
        name = name.lower()
    elif BATCH_NAME_TRANSFORM == "title":
        name = name.title()
    
    return name

def should_include_video_file(file_path):
    """Check if video file meets size requirements"""
    try:
        size_bytes = file_path.stat().st_size
        size_mb = size_bytes / (1024**2)
        size_gb = size_bytes / (1024**3)
        
        if MIN_FILE_SIZE_MB > 0 and size_mb < MIN_FILE_SIZE_MB:
            return False
        
        if MAX_FILE_SIZE_GB > 0 and size_gb > MAX_FILE_SIZE_GB:
            return False
        
        return True
    except:
        return True  # If we can't check size, include it

def get_target_directories():
    """Get list of directories to scan based on configuration"""
    if TARGET_DIRECTORIES:
        # Use specified target directories
        targets = []
        for target in TARGET_DIRECTORIES:
            target_path = Path(target).expanduser().resolve()
            if target_path.exists() and target_path.is_dir():
                targets.append(target_path)
            else:
                print(f"⚠️ Target directory not found: {target}")
        return targets
    else:
        # Use default auto-detection logic
        return [Path("."), Path("..")]

def get_output_path(filename):
    """Get full output path for a file based on configuration"""
    if SESSION_METADATA_LOCATION:
        # Use session-specific location
        session_path = Path(SESSION_METADATA_LOCATION).expanduser().resolve()
        session_path.mkdir(parents=True, exist_ok=True)
        return session_path / filename
    else:
        # Use configured output directory
        output_path = Path(OUTPUT_DIRECTORY).expanduser().resolve()
        output_path.mkdir(parents=True, exist_ok=True)
        return output_path / filename

def get_metadata_paths():
    """Get all configured output file paths"""
    return {
        'metadata': get_output_path(METADATA_FILENAME),
        'inventory': get_output_path(INVENTORY_FILENAME),
        'workspace_info': get_output_path(WORKSPACE_INFO_FILENAME)
    }

# Update video extensions with additional formats
ALL_VIDEO_EXTENSIONS = VIDEO_EXTENSIONS.union(set(ext.lower() for ext in ADDITIONAL_VIDEO_EXTENSIONS))

print(f"🎯 Will scan {len(get_target_directories())} target directories")
print(f"📹 Monitoring {len(ALL_VIDEO_EXTENSIONS)} video formats: {sorted(ALL_VIDEO_EXTENSIONS)}")
print(f"💾 Output files will be saved to:")
paths = get_metadata_paths()
for file_type, path in paths.items():
    print(f"   {file_type}: {path}")

In [None]:
# ============================================================================

# Cell 2: Discovery Functions
def discover_video_batches(search_path="."):
    """Discover video batches based on parent directory structure"""
    search_root = Path(search_path)
    batches = defaultdict(list)
    
    print(f"🔍 Scanning for video batches in: {search_root.absolute()}")
    
    # Scan all subdirectories for video files
    for item in search_root.iterdir():
        if item.is_dir() and not item.name.startswith('.'):
            batch_name = item.name
            videos = []
            
            # Find video files in this directory
            for video_file in item.iterdir():
                if video_file.is_file() and video_file.suffix.lower() in VIDEO_EXTENSIONS:
                    videos.append({
                        'filename': video_file.name,
                        'path': str(video_file.relative_to(search_root)),
                        'full_path': str(video_file.absolute()),
                        'size_bytes': video_file.stat().st_size,
                        'size_mb': round(video_file.stat().st_size / (1024**2), 2),
                        'size_gb': round(video_file.stat().st_size / (1024**3), 3),
                        'modified': datetime.fromtimestamp(video_file.stat().st_mtime).isoformat(),
                        'batch': batch_name
                    })
            
            if videos:
                batches[batch_name] = videos
                print(f"📁 Found batch '{batch_name}': {len(videos)} videos")
    
    return dict(batches)

def scan_current_directory():
    """Scan current directory for video batches"""
    return discover_video_batches(".")

def scan_parent_directory():
    """Scan parent directory for video batches (useful when notebook is in subdirectory)"""
    return discover_video_batches("..")

print("✅ Discovery functions ready")

In [None]:
# ============================================================================

# Cell 3: Run Discovery
# First try current directory, then parent if no batches found
batches = scan_current_directory()

if not batches:
    print("🔄 No batches in current directory, checking parent...")
    batches = scan_parent_directory()

if not batches:
    print("❌ No video batches found!")
    print("Expected structure:")
    print("  📁 BatchName1/")
    print("  ├── 🎬 video1.mp4")
    print("  └── 🎬 video2.mp4")
else:
    print(f"\n🎯 Discovered {len(batches)} video batches")
    for batch_name in batches.keys():
        print(f"  📁 {batch_name}")

In [None]:
# ============================================================================

# Cell 4: Generate Comprehensive Metadata
def generate_batch_metadata(batches_dict):
    """Generate comprehensive metadata for all video batches"""
    
    metadata = {
        'scan_time': datetime.now().isoformat(),
        'notebook_location': str(repo_root.absolute()),
        'total_batches': len(batches_dict),
        'total_videos': sum(len(videos) for videos in batches_dict.values()),
        'batches': {}
    }
    
    for batch_name, videos in batches_dict.items():
        total_size = sum(video['size_bytes'] for video in videos)
        
        metadata['batches'][batch_name] = {
            'video_count': len(videos),
            'total_size_bytes': total_size,
            'total_size_mb': round(total_size / (1024**2), 2),
            'total_size_gb': round(total_size / (1024**3), 2),
            'videos': videos,
            'batch_id': hashlib.md5(batch_name.encode()).hexdigest()[:8],
            'first_video': videos[0]['filename'] if videos else None,
            'last_modified': max(video['modified'] for video in videos) if videos else None
        }
    
    return metadata

# Generate metadata
if batches:
    metadata = generate_batch_metadata(batches)
    print(f"📊 Generated metadata for {metadata['total_batches']} batches")
    print(f"   Total videos: {metadata['total_videos']}")
    print(f"   Total size: {sum(b['total_size_gb'] for b in metadata['batches'].values()):.2f} GB")
else:
    metadata = None

In [None]:
# ============================================================================

# Cell 5: Create DataFrame for Analysis
def create_video_dataframe(batches_dict):
    """Create a pandas DataFrame from video batch data"""
    all_videos = []
    
    for batch_name, videos in batches_dict.items():
        for video in videos:
            all_videos.append({
                'batch_name': batch_name,
                'filename': video['filename'],
                'size_mb': video['size_mb'],
                'size_gb': video['size_gb'],
                'modified_date': pd.to_datetime(video['modified']),
                'path': video['path'],
                'full_path': video['full_path']
            })
    
    return pd.DataFrame(all_videos)

if batches:
    df_videos = create_video_dataframe(batches)
    print(f"📈 Created DataFrame with {len(df_videos)} video records")
    print("\n🔍 Sample data:")
    display(df_videos.head())
    
    print("\n📊 Batch summary:")
    batch_summary = df_videos.groupby('batch_name').agg({
        'filename': 'count',
        'size_gb': 'sum'
    }).round(2)
    batch_summary.columns = ['video_count', 'total_gb']
    display(batch_summary)
else:
    df_videos = None

In [None]:
# Cell 6: Save Results (Enhanced with configuration support)
def save_batch_data(metadata, df_videos=None):
    """Save batch metadata and optionally DataFrame to configured locations"""
    
    # Get configured file paths
    paths = get_metadata_paths()
    
    # Save metadata JSON
    with open(paths['metadata'], 'w') as f:
        json.dump(metadata, f, indent=2)
    print(f"💾 Saved metadata to: {paths['metadata']}")
    
    # Save DataFrame as CSV if available
    if df_videos is not None:
        df_videos.to_csv(paths['inventory'], index=False)
        print(f"💾 Saved video inventory to: {paths['inventory']}")
    
    # Create workspace info
    workspace_info = {
        'workspace_type': 'video_batching',
        'structure': 'parent_directory_batching',
        'notebook_location': str(repo_root),
        'session_metadata_location': str(SESSION_METADATA_LOCATION) if SESSION_METADATA_LOCATION else None,
        'output_directory': str(OUTPUT_DIRECTORY),
        'batches_discovered': list(metadata['batches'].keys()) if metadata else [],
        'ready_for_processing': len(metadata['batches']) > 0 if metadata else False,
        'last_scan': datetime.now().isoformat(),
        'configuration': {
            'target_directories': TARGET_DIRECTORIES,
            'include_patterns': INCLUDE_BATCH_NAMES,
            'ignore_patterns': IGNORE_DIRECTORY_NAMES,
            'file_size_limits': {
                'min_mb': MIN_FILE_SIZE_MB,
                'max_gb': MAX_FILE_SIZE_GB
            },
            'batch_naming': {
                'transform': BATCH_NAME_TRANSFORM,
                'remove_prefixes': REMOVE_BATCH_PREFIXES,
                'remove_suffixes': REMOVE_BATCH_SUFFIXES
            }
        }
    }
    
    with open(paths['workspace_info'], 'w') as f:
        json.dump(workspace_info, f, indent=2)
    print(f"💾 Saved workspace info to: {paths['workspace_info']}")
    
    return paths

def save_session_snapshot():
    """Save a quick session snapshot with timestamp"""
    if metadata:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        snapshot_filename = f"session_snapshot_{timestamp}.json"
        snapshot_path = get_output_path(snapshot_filename)
        
        snapshot_data = {
            'timestamp': datetime.now().isoformat(),
            'session_id': timestamp,
            'batch_summary': {
                batch_name: {
                    'video_count': len(videos),
                    'total_gb': round(sum(v['size_gb'] for v in videos), 2)
                }
                for batch_name, videos in batches.items()
            },
            'total_batches': len(batches),
            'total_videos': sum(len(videos) for videos in batches.values()),
            'configuration_used': {
                'target_dirs': TARGET_DIRECTORIES,
                'session_location': SESSION_METADATA_LOCATION
            }
        }
        
        with open(snapshot_path, 'w') as f:
            json.dump(snapshot_data, f, indent=2)
        print(f"📸 Session snapshot saved to: {snapshot_path}")
        return snapshot_path
    return None

if metadata:
    saved_files = save_batch_data(metadata, df_videos)
    snapshot_file = save_session_snapshot()
    print("✅ All data saved successfully!")
    print(f"📂 Files saved to: {paths['metadata'].parent}")
else:
    print("❌ No data to save - no batches found")

In [None]:
# ============================================================================

# Cell 7: Interactive Analysis Functions
def analyze_batch(batch_name):
    """Analyze a specific batch"""
    if batch_name not in batches:
        print(f"❌ Batch '{batch_name}' not found")
        return
    
    videos = batches[batch_name]
    print(f"🎬 Analyzing batch: {batch_name}")
    print(f"   Videos: {len(videos)}")
    print(f"   Total size: {sum(v['size_gb'] for v in videos):.2f} GB")
    print(f"   Average size: {sum(v['size_mb'] for v in videos) / len(videos):.1f} MB")
    
    print("\n📋 Video list:")
    for video in videos:
        print(f"   • {video['filename']} ({video['size_mb']} MB)")

def list_all_batches():
    """List all discovered batches with summary info"""
    if not batches:
        print("❌ No batches found")
        return
    
    print("📁 All discovered batches:")
    for batch_name, videos in batches.items():
        total_gb = sum(v['size_gb'] for v in videos)
        print(f"   {batch_name}: {len(videos)} videos, {total_gb:.2f} GB")

# Run interactive analysis
if batches:
    print("🎯 Interactive Analysis Ready!")
    print("\nAvailable functions:")
    print("• list_all_batches() - Show all batches")
    print("• analyze_batch('batch_name') - Analyze specific batch")
    print("• df_videos - Pandas DataFrame with all video data")
    print("• metadata - Complete metadata dictionary")
    
    # Auto-run summary
    list_all_batches()
else:
    print("⚠️ No batches available for analysis")

In [None]:
# ============================================================================

# Cell 8: Utility Functions for Further Processing
def get_batch_paths(batch_name):
    """Get all video file paths for a specific batch"""
    if batch_name not in batches:
        return []
    return [video['full_path'] for video in batches[batch_name]]

def get_largest_videos(n=5):
    """Get the N largest video files across all batches"""
    if df_videos is not None:
        return df_videos.nlargest(n, 'size_gb')[['batch_name', 'filename', 'size_gb']]
    return None

def get_recent_videos(days=7):
    """Get videos modified in the last N days"""
    if df_videos is not None:
        cutoff_date = pd.Timestamp.now() - pd.Timedelta(days=days)
        recent = df_videos[df_videos['modified_date'] > cutoff_date]
        return recent[['batch_name', 'filename', 'modified_date', 'size_gb']]
    return None

print("🛠️ Utility functions loaded:")
print("• get_batch_paths(batch_name) - Get file paths for batch")
print("• get_largest_videos(n=5) - Find largest videos")
print("• get_recent_videos(days=7) - Find recently modified videos")

if df_videos is not None:
    print(f"\n🔍 Example - 3 largest videos:")
    display(get_largest_videos(3))

print("\n✅ Video batch processing notebook ready!")

# Connecting to Davinci Resolve Network Project Postgres Data

In [None]:
# /scripts/ingest_to_resolve.py

import json
from pathlib import Path

# 1. load your JSON metadata
meta = json.load(open(Path(__file__).parent.parent / "batch_metadata.json"))

# 2. grab the Resolve scripting API
import DaVinciResolveScript as bmd
resolve = bmd.scriptapp("Resolve")
pm = resolve.GetProjectManager()
proj = pm.LoadProject("StockFootage Library")
mp = proj.GetMediaPool()

# 3. for each batch, make a bin & import its clips
for batch_name, info in meta["batches"].items():
    # create (or reuse) a bin
    root = mp.GetRootFolder()
    try:
        bin = mp.AddSubFolder(root, batch_name)
    except:
        # already exists?
        bin = next(b for b in root.GetSubFolders() if b.GetName() == batch_name)

    # import media into that bin
    paths = [v["full_path"] for v in info["videos"]]
    mp.AddItemListToMediaPool(paths)

    # tag each clip with metadata
    for clip in bin.GetClipList():
        # set the “Scene” field to your batch name
        clip.SetClipProperty("Scene", batch_name)
        # set the “Comment” field to maybe your description or tags
        clip.SetClipProperty("Comment", f"{info['video_count']} clips, total {info['total_size_gb']:.2f} GB")

# 4. save the project
proj.Save()
print("✅ Ingest + tagging complete.")

# Writing Blackbox Global's `metadata.xml`

In [None]:
import pandas as pd

# Paths
template_path = "BlackBox Metadata Template MAY 10 2024.xlsx"

# Load the main template sheet
df_meta = pd.read_excel(template_path, sheet_name="template")

# (Optional) load dropdown values for validation
df_dd = pd.read_excel(template_path, sheet_name="dataDropdown")

In [None]:
# Example: assembling one row
new_row = {
    "File Name": "Z7V_1641/video1.mp4",
    "Description (min 15, max 200 characters, must be least 5 words)": "A wide shot of...",
    "Keywords (min 8, max 49, separated by comma, and no repetition)": "drone, landscape, 4k, nature, wide, aerial, motion, shot",
    "Category (use dropdown menu)": "Nature",
    "Editorial (use dropdown menu)": False,
    # etc… fill every required column
}
df_meta = df_meta.append(new_row, ignore_index=True)

In [None]:
with pd.ExcelWriter(template_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
    df_meta.to_excel(writer, sheet_name="template", index=False)

In [None]:
import xml.etree.ElementTree as ET

root = ET.Element("MediaMetaData")
for _, row in df_meta.iterrows():
    clip = ET.SubElement(root, "Clip")
    for col in df_meta.columns:
        child = ET.SubElement(clip, col.replace(" ", ""))
        child.text = str(row[col])
tree = ET.ElementTree(root)
tree.write("metadata.xml", encoding="utf-8", xml_declaration=True)

## With this flow you’ll have:

- excel‐driven human‐friendly template for metadata entry & validation
- automated sync from your Python pipeline into that template
- XML export for BlackBox Global ingestion
- Postgres backend feeding DaVinci Resolve’s shared library