In [None]:
import polars as pl
import sys
import os

sys.path.append(os.path.dirname(os.path.abspath('')))

from libraries.client_stashapp import get_stashapp_client, StashAppClient

stash = get_stashapp_client()
stash_client = StashAppClient()

stash_tags = stash_client.get_tags_by_names(["Video Cut", "Marker Source: PySceneDetect", "AI_Reviewed"])

def get_pyscenedetect_csv_path(scene_data):
    """
    Generate the expected PySceneDetect CSV file path for a given scene.
    
    Args:
        scene_data: Scene data dictionary with 'files' containing file info
        
    Returns:
        str: Path to the expected PySceneDetect CSV file
    """
    if not scene_data.get("files") or len(scene_data["files"]) == 0:
        return None
        
    video_file_path = scene_data["files"][0]["path"]
    basename = scene_data["files"][0]["basename"]
    directory_path = os.path.dirname(video_file_path)
    csv_file_path = os.path.join(directory_path, basename + ".Scenes.csv")
    
    return csv_file_path

def has_pyscenedetect_markers(scene_data):
    """
    Check if a scene already has PySceneDetect Video Cut markers.
    
    Args:
        scene_data: Scene data dictionary with 'scene_markers' containing marker info
        
    Returns:
        bool: True if scene has PySceneDetect Video Cut markers, False otherwise
    """
    if not scene_data.get("scene_markers"):
        return False
    
    # Check if any existing markers are "Video Cut" markers
    for marker in scene_data["scene_markers"]:
        if (marker.get("primary_tag") and 
            marker["primary_tag"].get("name") == "Video Cut"):
            return True
    
    return False

def has_been_ai_reviewed(scene_data):
    scene_tags = scene_data.get("tags", [])
    for tag in scene_tags:
        if tag.get("id") == stash_tags.ai_reviewed["id"]:
            return True
    return False


In [None]:
all_stash_scenes = stash.find_scenes(fragment="id title files { id basename path } tags { id name } scene_markers { id primary_tag { id name }}")

In [None]:
# %%
# Get scene IDs for all scenes that have PySceneDetect CSV sidecar files
print("üîç Finding all scenes with PySceneDetect CSV sidecar files...")

# Get all scenes with files
print("üìã Fetching all scenes from Stash...")
scenes_with_csv = []
scenes_already_processed = []
scene_count = 0

for scene in all_stash_scenes:
    scene_count += 1
    if scene_count % 100 == 0:
        print(f"   Processed {scene_count} scenes...")
    
    # Get expected CSV path using our shared function
    csv_file_path = get_pyscenedetect_csv_path(scene)
    
    if csv_file_path and os.path.exists(csv_file_path):
        if has_been_ai_reviewed(scene) or has_pyscenedetect_markers(scene):
            scenes_already_processed.append({
                'scene_id': scene['id'],
                'title': scene.get('title', 'No Title'),
                'csv_path': csv_file_path
            })
        else:
            scenes_with_csv.append({
                'scene_id': scene['id'],
                'title': scene.get('title', 'No Title'),
                'csv_path': csv_file_path
            })

print(f"\n‚úÖ Found {len(scenes_with_csv)} scenes with PySceneDetect CSV files that need processing")
print(f"üìù Found {len(scenes_already_processed)} scenes already processed (have Video Cut markers)")
print(f"üìä Total scenes checked: {scene_count}")

# Convert to DataFrames for easy viewing
df_scenes_with_csv = pl.DataFrame(scenes_with_csv) if scenes_with_csv else pl.DataFrame({"scene_id": [], "title": [], "csv_path": []})
df_scenes_already_processed = pl.DataFrame(scenes_already_processed) if scenes_already_processed else pl.DataFrame({"scene_id": [], "title": [], "csv_path": []})

print(f"\nüìä Scenes needing processing:")
df_scenes_with_csv


In [None]:
# %%
# Process scenes that need PySceneDetect markers
# Only process scenes that have CSV files but don't already have Video Cut markers

import csv
import os

print(f"üîÑ Processing {len(scenes_with_csv)} scenes that need PySceneDetect markers...")

for scene_info in scenes_with_csv:
    # Find the full scene data
    scene = next((s for s in all_stash_scenes if s['id'] == scene_info['scene_id']), None)
    if not scene:
        print(f"‚ùå Could not find scene data for ID {scene_info['scene_id']}")
        continue
    
    # Double-check that scene doesn't already have markers (safety check)
    if has_pyscenedetect_markers(scene):
        print(f"‚ö†Ô∏è Scene {scene['id']} already has Video Cut markers, skipping...")
        continue
    
    print(f"\nüìπ Processing scene {scene['id']}: {scene.get('title', 'No Title')}")
    
    # Use our shared function to get the CSV path
    csv_file_path = get_pyscenedetect_csv_path(scene)
    
    if not csv_file_path or not os.path.exists(csv_file_path):
        print(f"‚ùå CSV file not found: {csv_file_path}")
        continue
    
    print(f"üìä Reading CSV file: {csv_file_path}")
    
    # Read CSV file manually to handle the unusual structure:
    # Line 1: Timecode list (skip this)
    # Line 2: Column headers
    # Line 3+: Scene data

    # Try different encodings to handle the UnicodeDecodeError
    encodings_to_try = ['utf-8-sig', 'utf-16', 'utf-8', 'cp1252', 'iso-8859-1']

    for encoding in encodings_to_try:
        try:
            with open(csv_file_path, 'r', encoding=encoding) as f:
                reader = csv.reader(f)
                rows = list(reader)
            
            # Skip first row (timecode list), use second row as headers
            headers = rows[1]
            data_rows = rows[2:]
            print(f"‚úÖ Successfully read CSV with encoding: {encoding}")
            break
            
        except UnicodeDecodeError as e:
            print(f"‚ùå Failed with {encoding}: {e}")
            continue
        except Exception as e:
            print(f"‚ùå Error with {encoding}: {e}")
            continue
    else:
        print(f"‚ùå Could not read CSV file with any encoding, skipping scene {scene['id']}")
        continue

    # Convert data to proper types for Polars DataFrame
    processed_data = []
    for row in data_rows:
        processed_row = []
        for i, value in enumerate(row):
            # Convert numeric columns to appropriate types
            if i in [0, 1, 4, 7]:  # Scene Number, Start Frame, End Frame, Length (frames)
                processed_row.append(int(value))
            elif i in [3, 6, 9]:  # Start Time, End Time, Length (seconds)
                processed_row.append(float(value))
            else:
                processed_row.append(value)  # Keep timecode strings as-is
        processed_data.append(processed_row)

    # Create Polars DataFrame with proper schema
    df_scenes_raw = pl.DataFrame(processed_data, schema=headers, orient="row")

    # Transform CSV data to match existing DataFrame structure
    df_markers = df_scenes_raw.select([
        pl.col("Scene Number").alias("scene_number"),
        pl.col("Start Time (seconds)").alias("seconds"),
        pl.col("End Time (seconds)").alias("end_seconds"),
        pl.col("Length (seconds)").alias("length_seconds")
    ]).with_columns([
        # Ensure all columns are proper numeric types
        pl.col("scene_number").cast(pl.Int64),
        pl.col("seconds").cast(pl.Float64),
        pl.col("end_seconds").cast(pl.Float64),
        pl.col("length_seconds").cast(pl.Float64)
    ])

    print(f"üìä Found {len(df_markers)} scenes/cuts to process")
    
    # Create scene markers for each cut
    scene_id = scene["id"]
    primary_tag_id = stash_tags.video_cut['id']
    tag_ids = [stash_tags.marker_source_pyscenedetect['id']]

    results = []
    markers_created = 0

    for row in df_markers.iter_rows(named=True):
        marker_data = {
            "scene_id": scene_id,
            "title": stash_tags.video_cut['name'],
            "primary_tag_id": primary_tag_id,
            "seconds": row["seconds"],
            "end_seconds": row["end_seconds"],
            "tag_ids": tag_ids,
        }
        
        try:
            result = stash.create_scene_marker(marker_data)
            results.append(result)
            markers_created += 1
        except Exception as e:
            print(f"‚ùå Error creating marker: {e}")
            results.append(None)
    
    print(f"‚úÖ Created {markers_created} markers for scene {scene_id}")
    
    # Mark scene as processed
    try:
        stash_client.update_tags_for_scenes(
            [scene_id],
            ["Scenes: PySceneDetect: Processed"],
            []
        )
        print(f"‚úÖ Tagged scene {scene_id} as processed")
    except Exception as e:
        print(f"‚ùå Error tagging scene: {e}")

print(f"\nüéâ Completed processing all scenes with PySceneDetect CSV files!")

In [None]:
# %%
# Display scenes that already have markers (optional viewing)
print(f"üìù Scenes already processed with Video Cut markers:")
df_scenes_already_processed
