In [1]:
import os
import polars as pl
import re
from typing import List, Dict, Any
import uuid
from dotenv import load_dotenv
import sys

load_dotenv()

sys.path.append(os.path.dirname(os.getcwd()))


# Import StashApp client
from libraries.client_stashapp import StashAppClient, get_stashapp_client


# Initialize clients
stash_client = StashAppClient()
stash_raw_client = get_stashapp_client()

# Define paths to scan
PATHS = [
    r"F:\Culture\Staging",
    r"W:\Culture\Videos",
    r"X:\Culture\Videos",
    r"Y:\Culture\Videos",
    r"Z:\Culture\Videos",
]

# Define Culture Extractor URL pattern
CULTURE_EXTRACTOR_URL_PATTERN = "https://culture.extractor/galleries/"


def is_valid_uuid(uuid_str: str) -> bool:
    """Check if a string is a valid UUID."""
    try:
        uuid_obj = uuid.UUID(uuid_str)
        return str(uuid_obj) == uuid_str
    except ValueError:
        return False


def extract_uuid_from_filename(filename: str) -> str | None:
    """Extract UUID from filename if it exists."""
    # Match UUID pattern at the end of filename before extension
    match = re.search(
        r"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.", filename
    )
    if match and is_valid_uuid(match.group(1)):
        return match.group(1)
    return None


def get_existing_ce_url(existing_urls: List[str], ce_uuid: str) -> str | None:
    """Get existing Culture Extractor URL if it exists for the given UUID."""
    expected_url = f"{CULTURE_EXTRACTOR_URL_PATTERN}{ce_uuid}"
    return expected_url if expected_url in existing_urls else None

dUsing stash (v0.28.1-95-g7e9d8b78) endpoint at http://localhost:6969/graphql
dPersisting Connection to Stash with ApiKey...
dUsing stash (v0.28.1-95-g7e9d8b78) endpoint at http://localhost:6969/graphql
dPersisting Connection to Stash with ApiKey...


In [2]:
# Get all galleries with their files and existing URLs
galleries = stash_raw_client.find_galleries(
    fragment="""
    id
    title
    urls
    files {
        id
        basename
        path
    }
    """
)

# Process galleries and files to extract UUIDs from filenames
results = []

for gallery in galleries:
    gallery_id = gallery.get("id")
    gallery_title = gallery.get("title")
    existing_urls = gallery.get("urls", [])
    files = gallery.get("files", [])
    for file in files:
        file_basename = file.get("basename")
        file_path = file.get("path")
        # Only consider files in the specified PATHS
        if not any(str(file_path).startswith(p) for p in PATHS):
            continue
        ce_uuid = extract_uuid_from_filename(file_basename)
        results.append(
            {
                "gallery_id": gallery_id,
                "gallery_title": gallery_title,
                "file_basename": file_basename,
                "file_path": file_path,
                "ce_uuid": ce_uuid,
                "existing_urls": existing_urls,
            }
        )

# Ensure all dicts have the same keys
all_keys = {k for d in results for k in d.keys()}
for d in results:
    for k in all_keys:
        if k not in d:
            d[k] = None

# Ensure all ce_uuid values are str or None
for d in results:
    if d["ce_uuid"] is not None:
        d["ce_uuid"] = str(d["ce_uuid"])
    else:
        d["ce_uuid"] = None

# Build DataFrame with explicit schema override for ce_uuid column
files_df = pl.DataFrame(
    results, schema_overrides={"ce_uuid": pl.Utf8}, infer_schema_length=1000
)

# Filter to only files with a found UUID
files_with_uuid_df = files_df.filter(pl.col("ce_uuid").is_not_null())

In [3]:
# Filter out galleries that already have the matching Culture Extractor URL
galleries_to_update = []
galleries_already_set = []

for row in files_with_uuid_df.iter_rows(named=True):
    existing_urls = row["existing_urls"] or []
    existing_ce_url = get_existing_ce_url(existing_urls, row["ce_uuid"])

    # Check if the Culture Extractor URL already exists
    if existing_ce_url:
        galleries_already_set.append(row)
    else:
        galleries_to_update.append(row)

# Create DataFrames for verification
galleries_to_update_df = (
    pl.DataFrame(galleries_to_update) if galleries_to_update else pl.DataFrame()
)
galleries_already_set_df = (
    pl.DataFrame(galleries_already_set) if galleries_already_set else pl.DataFrame()
)

print(f"Total galleries with UUIDs found: {len(files_with_uuid_df)}")
print(f"Galleries that need updating: {len(galleries_to_update_df)}")
print(f"Galleries already set (skipped): {len(galleries_already_set_df)}")

if len(galleries_already_set_df) > 0:
    print("\nGalleries already set with matching Culture Extractor URL:")
    print(
        galleries_already_set_df.select(
            ["gallery_id", "gallery_title", "file_basename", "ce_uuid"]
        )
    )

print("\nGalleries to be updated:")
galleries_to_update_df

Total galleries with UUIDs found: 414
Galleries that need updating: 132
Galleries already set (skipped): 282

Galleries already set with matching Culture Extractor URL:
shape: (282, 4)
┌────────────┬───────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ gallery_id ┆ gallery_title ┆ file_basename                   ┆ ce_uuid                         │
│ ---        ┆ ---           ┆ ---                             ┆ ---                             │
│ str        ┆ str           ┆ str                             ┆ str                             │
╞════════════╪═══════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ 10497      ┆               ┆ Sexy Hub꞉ TransAngels - 2019-0… ┆ 0195b17d-0287-7403-9b06-03201c… │
│ 10498      ┆               ┆ Sexy Hub꞉ TransAngels - 2023-1… ┆ 0195bc93-bb30-72f0-9a63-cb7ea0… │
│ 10500      ┆               ┆ Sexy Hub꞉ Dane Jones - 2012-01… ┆ 01959e92-f966-74a2-905f-83b169… │
│ 10501      ┆         

gallery_id,gallery_title,file_basename,file_path,ce_uuid,existing_urls
str,str,str,str,str,list[null]
"""10812""","""""","""Sexy Hub꞉ Lesbea - 2012-04-02 …","""F:\Culture\Staging\CHECKED_SEX…","""01959648-3873-7408-8d21-da81d0…",[]
"""10813""","""""","""Sexy Hub꞉ Lesbea - 2012-04-16 …","""F:\Culture\Staging\CHECKED_SEX…","""01959645-eef0-737f-84fa-d1bdc7…",[]
"""10814""","""""","""Sexy Hub꞉ Lesbea - 2013-02-06 …","""F:\Culture\Staging\CHECKED_SEX…","""01959628-59a7-7108-8000-19269a…",[]
"""10815""","""""","""Sexy Hub꞉ Lesbea - 2013-03-06 …","""F:\Culture\Staging\CHECKED_SEX…","""01959625-0a5c-7510-bca8-135bac…",[]
"""10816""","""""","""Sexy Hub꞉ Lesbea - 2012-09-12 …","""F:\Culture\Staging\CHECKED_SEX…","""01959631-b6a4-722b-920c-131394…",[]
…,…,…,…,…,…
"""10940""","""""","""Sexy Hub꞉ Lesbea - 2019-07-13 …","""F:\Culture\Staging\CHECKED_SEX…","""0195937f-3903-7634-a462-223746…",[]
"""10941""","""""","""Sexy Hub꞉ Lesbea - 2019-07-28 …","""F:\Culture\Staging\CHECKED_SEX…","""0195937c-7607-7574-859b-24a231…",[]
"""10942""","""""","""Sexy Hub꞉ Lesbea - 2019-08-04 …","""F:\Culture\Staging\CHECKED_SEX…","""0195937b-4143-72c1-b084-255f20…",[]
"""10943""","""""","""Sexy Hub꞉ Lesbea - 2019-10-27 …","""F:\Culture\Staging\CHECKED_SEX…","""01959370-3d01-7499-b97d-0cfb8f…",[]


In [4]:
# Apply step: Update galleries with extracted UUIDs as URLs
update_results = []

for row in galleries_to_update_df.iter_rows(named=True):
    gallery_id = row["gallery_id"]
    ce_uuid = row["ce_uuid"]
    gallery_title = row["gallery_title"]
    file_basename = row["file_basename"]
    existing_urls = row["existing_urls"] or []

    # Create the Culture Extractor URL
    ce_url = f"{CULTURE_EXTRACTOR_URL_PATTERN}{ce_uuid}"

    try:
        # Add the new URL to existing URLs
        updated_urls = existing_urls + [ce_url]

        # Update the gallery with the new URL list
        result = stash_raw_client.update_gallery(
            {
                "id": gallery_id,
                "urls": updated_urls,
            }
        )

        update_results.append(
            {
                "gallery_id": gallery_id,
                "gallery_title": gallery_title,
                "file_basename": file_basename,
                "ce_uuid": ce_uuid,
                "status": "success",
                "error": None,
            }
        )

        print(f"✓ Updated gallery {gallery_id} ({gallery_title}) with URL {ce_url}")

    except Exception as e:
        update_results.append(
            {
                "gallery_id": gallery_id,
                "gallery_title": gallery_title,
                "file_basename": file_basename,
                "ce_uuid": ce_uuid,
                "status": "error",
                "error": str(e),
            }
        )

        print(f"✗ Failed to update gallery {gallery_id} ({gallery_title}): {e}")

✓ Updated gallery 10812 () with URL https://culture.extractor/galleries/01959648-3873-7408-8d21-da81d06a71ef
✓ Updated gallery 10813 () with URL https://culture.extractor/galleries/01959645-eef0-737f-84fa-d1bdc7a6ab07
✓ Updated gallery 10814 () with URL https://culture.extractor/galleries/01959628-59a7-7108-8000-19269a504b41
✓ Updated gallery 10815 () with URL https://culture.extractor/galleries/01959625-0a5c-7510-bca8-135bac8d834e
✓ Updated gallery 10816 () with URL https://culture.extractor/galleries/01959631-b6a4-722b-920c-131394574bff
✓ Updated gallery 10817 () with URL https://culture.extractor/galleries/01959650-485e-7529-a83c-eccc099584c9
✓ Updated gallery 10818 () with URL https://culture.extractor/galleries/0195964f-2bf3-7281-9c5f-f27043a8a653
✓ Updated gallery 10819 () with URL https://culture.extractor/galleries/01959649-7cc4-70b4-89fe-6103890d4109
✓ Updated gallery 10820 () with URL https://culture.extractor/galleries/01959647-4369-75cb-8062-12df70b3b6f2
✓ Updated gallery 1

In [5]:
# Verification of apply step results
if update_results:
    update_results_df = pl.DataFrame(update_results)
    print(f"Total galleries processed: {len(update_results_df)}")
    print(
        f"Successful updates: {len(update_results_df.filter(pl.col('status') == 'success'))}"
    )
    print(
        f"Failed updates: {len(update_results_df.filter(pl.col('status') == 'error'))}"
    )

    # Show any errors
    errors_df = update_results_df.filter(pl.col("status") == "error")
    if len(errors_df) > 0:
        print("\nErrors encountered:")
        errors_df

    # Show successful updates
    success_df = update_results_df.filter(pl.col("status") == "success")
    success_df
else:
    print("No galleries needed updating.")


Total galleries processed: 132
Successful updates: 132
Failed updates: 0
