In [1]:
# Importing metadata from Culture Extractor to StashApp
# 1. Import metadata from Culture Extractor
# 2. Import metadata from StashApp by oshash
# 3. Join the two on oshash
# 4. Query metadata from StashDB by phash
# 5. Join the three on phash
# 6. Match performers between Culture Extractor, StashApp and StashDB
# 7. Set Culture Extractor UUIDs to performer custom fields in StashApp
# 8. Set metadata to StashApp scenes

In [2]:
import libraries.client_culture_extractor as client_culture_extractor
import os
import polars as pl
from dotenv import load_dotenv

load_dotenv()

# Culture Extractor
user = os.environ.get("CE_DB_USERNAME")
pw = os.environ.get("CE_DB_PASSWORD")
host = os.environ.get("CE_DB_HOST")
port = os.environ.get("CE_DB_PORT")
db = os.environ.get("CE_DB_NAME")

connection_string = f"dbname={db} user={user} password={pw} host={host} port={port}"

culture_extractor_client = client_culture_extractor.ClientCultureExtractor(connection_string)


# StashApp
from libraries.client_stashapp import StashAppClient, get_stashapp_client

stash_client = StashAppClient()
stash_raw_client = get_stashapp_client()


# StashDB
from libraries.StashDbClient import StashDbClient
import dotenv
import os

dotenv.load_dotenv()

stashbox_client = StashDbClient(
    os.getenv("STASHDB_ENDPOINT"),
    os.getenv("STASHDB_API_KEY"),
)


# Functions
def hex_to_binary(hex_string):
    return bin(int(hex_string, 16))[2:].zfill(64)

def calculate_hamming_distance(phash1, phash2):
    # Convert hexadecimal phashes to binary
    binary1 = hex_to_binary(phash1)
    binary2 = hex_to_binary(phash2)
    
    # Ensure both binary strings are of equal length
    if len(binary1) != len(binary2):
        raise ValueError("Binary strings must be of equal length")
    
    # Calculate Hamming distance
    return sum(c1 != c2 for c1, c2 in zip(binary1, binary2))

# Example usage:
# phash1 = "951428607cf7cb8f"
# phash2 = "951428607cf7cb8e"
# distance = calculate_hamming_distance(phash1, phash2)
# print(f"Hamming distance between {phash1} and {phash2}: {distance}")

def levenshtein(s1: str, s2: str):
    if not s1:
        return None
    if not s2:
        return None
    from Levenshtein import distance
    return distance(s1.lower(), s2.lower())


dUsing stash (v0.27.2-37-g0621d871) endpoint at http://localhost:6969/graphql
dPersisting Connection to Stash with ApiKey...
dUsing stash (v0.27.2-37-g0621d871) endpoint at http://localhost:6969/graphql
dPersisting Connection to Stash with ApiKey...


In [3]:
culture_extractor_client.get_database_schema().write_json()

'[{"table_name":"__EFMigrationsHistory","column_name":"migration_id","data_type":"character varying"},{"table_name":"__EFMigrationsHistory","column_name":"product_version","data_type":"character varying"},{"table_name":"downloads","column_name":"available_file","data_type":"json"},{"table_name":"downloads","column_name":"content_type","data_type":"text"},{"table_name":"downloads","column_name":"downloaded_at","data_type":"timestamp without time zone"},{"table_name":"downloads","column_name":"file_metadata","data_type":"json"},{"table_name":"downloads","column_name":"file_type","data_type":"text"},{"table_name":"downloads","column_name":"original_filename","data_type":"text"},{"table_name":"downloads","column_name":"release_uuid","data_type":"uuid"},{"table_name":"downloads","column_name":"saved_filename","data_type":"text"},{"table_name":"downloads","column_name":"uuid","data_type":"uuid"},{"table_name":"downloads","column_name":"variant","data_type":"text"},{"table_name":"performers",

In [4]:
all_tags = stash_raw_client.find_tags()
all_ce_sites = culture_extractor_client.get_sites()
all_ce_sub_sites = culture_extractor_client.get_sub_sites()
all_stash_studios = stash_client.get_studios()
all_ce_sites_stash_studios_joined = all_ce_sites.join(all_stash_studios, left_on="ce_sites_name", right_on="stash_studios_name", how="left", coalesce=False)

In [5]:
site_name = "Femjoy"
all_ce_sites_stash_studios_joined.filter(pl.col("stash_studios_name").str.contains(site_name))

ce_sites_uuid,ce_sites_short_name,ce_sites_name,ce_sites_url,stash_studios_id,stash_studios_name,stash_studios_url,stash_studios_stashdb_id,stash_studios_tpdb_id,stash_studios_ce_id,stash_studios_parent_studio_id,stash_studios_parent_studio_name,stash_studios_parent_studio_url,stash_studios_parent_studio_stashdb_id,stash_studios_parent_studio_tpdb_id,stash_studios_parent_studio_ce_id
str,str,str,str,i64,str,str,str,str,str,i64,str,str,str,str,str
"""019283a1-e6d7-767c-98dd-0fd1b1…","""femjoy""","""Femjoy""","""https://www.femjoy.com""",605,"""Femjoy""","""https://femjoy.com/""","""e3dba97c-2c0c-4402-b1ed-cbf977…","""4ebbb416-1c35-4826-8e13-61662e…","""019283a1-e6d7-767c-98dd-0fd1b1…",,,,,,


In [6]:
rows = all_ce_sites_stash_studios_joined.filter(pl.col("stash_studios_name").str.contains(site_name))
selected_studio = rows.to_dicts()[0]
stash_client.set_studio_stash_id_for_endpoint(selected_studio["stash_studios_id"], "https://culture.extractor/graphql", selected_studio["ce_sites_uuid"])
selected_studio

{'ce_sites_uuid': '019283a1-e6d7-767c-98dd-0fd1b1dd83ac',
 'ce_sites_short_name': 'femjoy',
 'ce_sites_name': 'Femjoy',
 'ce_sites_url': 'https://www.femjoy.com',
 'stash_studios_id': 605,
 'stash_studios_name': 'Femjoy',
 'stash_studios_url': 'https://femjoy.com/',
 'stash_studios_stashdb_id': 'e3dba97c-2c0c-4402-b1ed-cbf97717faee',
 'stash_studios_tpdb_id': '4ebbb416-1c35-4826-8e13-61662e605757',
 'stash_studios_ce_id': '019283a1-e6d7-767c-98dd-0fd1b1dd83ac',
 'stash_studios_parent_studio_id': None,
 'stash_studios_parent_studio_name': None,
 'stash_studios_parent_studio_url': None,
 'stash_studios_parent_studio_stashdb_id': None,
 'stash_studios_parent_studio_tpdb_id': None,
 'stash_studios_parent_studio_ce_id': None}

In [7]:
downloads = culture_extractor_client.get_downloads(selected_studio["ce_sites_uuid"])
downloads

ce_downloads_site_uuid,ce_downloads_site_name,ce_downloads_sub_site_name,ce_downloads_release_uuid,ce_downloads_release_date,ce_downloads_release_short_name,ce_downloads_release_name,ce_downloads_release_url,ce_downloads_release_description,ce_downloads_release_created,ce_downloads_release_last_updated,ce_downloads_release_available_files,ce_downloads_release_json_document,ce_downloads_uuid,ce_downloads_downloaded_at,ce_downloads_file_type,ce_downloads_content_type,ce_downloads_variant,ce_downloads_available_file,ce_downloads_original_filename,ce_downloads_saved_filename,ce_downloads_file_metadata,ce_downloads_performers,ce_downloads_tags,ce_downloads_hash_oshash,ce_downloads_hash_phash,ce_downloads_hash_sha256
str,str,str,str,date,str,str,str,str,datetime[μs],datetime[μs],str,str,str,datetime[μs],str,str,str,str,str,str,str,list[struct[4]],list[struct[4]],str,str,str
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286ea-59ca-74e5-8d7b-014126…",2006-09-15,"""9971""","""Perfect Waves""","""https://www.femjoy.com/post/99…","""""",2024-10-13 17:25:58.356211,2024-10-13 17:25:58.356211,"""[{""file_type"": ""video"", ""conte…","""{""external_id"": ""9971"", ""title…","""019286ea-7d62-75a6-9172-6ececd…",2024-10-13 20:26:07.458313,"""video""","""scene""","""1280x720-mov""","""{""file_type"": ""video"", ""conten…","""femjoy_film_110831_1280x720_h2…","""Femjoy - 2006-09-15 - Perfect …","""{""$type"": ""VideoHashes"", ""dura…","[{""6c293b62-af42-44f9-8bd3-411578247453"",""corinna"",""Corinna"",""/models/corinna""}]",[],"""fb110d143cc5703e""","""c6f8dd8da2885dd0""",
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286ea-59ca-74e5-8d7b-014126…",2006-09-15,"""9971""","""Perfect Waves""","""https://www.femjoy.com/post/99…","""""",2024-10-13 17:25:58.356211,2024-10-13 17:25:58.356211,"""[{""file_type"": ""video"", ""conte…","""{""external_id"": ""9971"", ""title…","""019286ea-7d66-7360-9347-9cbc1e…",2024-10-13 20:26:07.462823,"""image""","""cover""","""""","""{""file_type"": ""image"", ""conten…","""7751476084623_nosidebar.jpg""","""Femjoy - 2006-09-15 - Perfect …","""{""$type"": ""ImageFileMetadata"",…","[{""6c293b62-af42-44f9-8bd3-411578247453"",""corinna"",""Corinna"",""/models/corinna""}]",[],,,"""879c13e332b7fe1293186f7470144b…"
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286ea-5c5c-70b3-9803-7219f4…",2006-10-10,"""9975""","""Cat Nap""","""https://www.femjoy.com/post/99…","""""",2024-10-13 17:25:59.013226,2024-10-13 17:25:59.013226,"""[{""file_type"": ""video"", ""conte…","""{""external_id"": ""9975"", ""title…","""019286ea-9e0b-727f-9e36-f33583…",2024-10-13 20:26:15.819191,"""video""","""scene""","""1280x720-mov""","""{""file_type"": ""video"", ""conten…","""femjoy_film_110858_1280x720_h2…","""Femjoy - 2006-10-10 - Cat Nap …","""{""$type"": ""VideoHashes"", ""dura…","[{""c483bee0-4f9b-4807-a156-86ae796c106f"",""jenni"",""Jenni"",""/models/jenni""}]",[],"""29ee88652c2448fd""","""a52166517a7c39b6""",
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286ea-5c5c-70b3-9803-7219f4…",2006-10-10,"""9975""","""Cat Nap""","""https://www.femjoy.com/post/99…","""""",2024-10-13 17:25:59.013226,2024-10-13 17:25:59.013226,"""[{""file_type"": ""video"", ""conte…","""{""external_id"": ""9975"", ""title…","""019286ea-9e0f-7208-a577-be9392…",2024-10-13 20:26:15.823376,"""image""","""cover""","""""","""{""file_type"": ""image"", ""conten…","""3981476084583_nosidebar.jpg""","""Femjoy - 2006-10-10 - Cat Nap …","""{""$type"": ""ImageFileMetadata"",…","[{""c483bee0-4f9b-4807-a156-86ae796c106f"",""jenni"",""Jenni"",""/models/jenni""}]",[],,,"""289dee0ea1288c07cb0cef94d70146…"
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286ea-9e92-7732-8459-86b8de…",2006-11-01,"""9979""","""Open Beauty""","""https://www.femjoy.com/post/99…","""""",2024-10-13 17:26:15.963841,2024-10-13 17:26:15.963841,"""[{""file_type"": ""video"", ""conte…","""{""external_id"": ""9979"", ""title…","""019286ea-d97f-72a1-941f-6de58e…",2024-10-13 20:26:31.039039,"""image""","""cover""","""""","""{""file_type"": ""image"", ""conten…","""1501476084576_nosidebar.jpg""","""Femjoy - 2006-11-01 - Open Bea…","""{""$type"": ""ImageFileMetadata"",…","[{""c483bee0-4f9b-4807-a156-86ae796c106f"",""jenni"",""Jenni"",""/models/jenni""}]",[],,,"""1458e96c0930edeebec9333be38ef6…"
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-9734-74da-a188-6fd9bd…",2011-09-03,"""3718""","""Because I Miss You""","""https://www.femjoy.com/post/37…","""""",2024-10-14 04:03:01.707957,2024-10-14 04:03:01.707957,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3718"", ""title…","""01928931-a9a8-7207-817a-c962fc…",2024-10-14 07:03:06.280817,"""image""","""cover""","""""","""{""file_type"": ""image"", ""conten…","""5891476365091_nosidebar.jpg""","""Femjoy - 2011-09-03 - Because …","""{""$type"": ""ImageFileMetadata"",…","[{""79e16c48-b3df-4fe4-9622-3722fe646618"",""danica"",""Danica"",""/models/danica""}]",[],,,"""df372082a60a3752a09ef495b254cf…"
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-af7e-740b-9240-15c642…",2011-09-04,"""3872""","""If You Like Me""","""https://www.femjoy.com/post/38…","""""",2024-10-14 04:03:07.887213,2024-10-14 04:03:07.887213,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3872"", ""title…","""01928931-b540-7643-8bd2-6b1114…",2024-10-14 07:03:09.248008,"""image""","""cover""","""""","""{""file_type"": ""image"", ""conten…","""9211476365089_nosidebar.jpg""","""Femjoy - 2011-09-04 - If You L…","""{""$type"": ""ImageFileMetadata"",…","[{""6c293b62-af42-44f9-8bd3-411578247453"",""corinna"",""Corinna"",""/models/corinna""}]",[],,,"""f596a573bf30c182167ff44f4e2303…"
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-af7e-740b-9240-15c642…",2011-09-04,"""3872""","""If You Like Me""","""https://www.femjoy.com/post/38…","""""",2024-10-14 04:03:07.887213,2024-10-14 04:03:07.887213,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3872"", ""title…","""01928931-b53d-7298-b118-417f6b…",2024-10-14 07:03:09.245009,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2011-09-04 - If You L…","""{""$type"": ""GalleryZipFileMetad…","[{""6c293b62-af42-44f9-8bd3-411578247453"",""corinna"",""Corinna"",""/models/corinna""}]",[],,,"""d9f5ea9774fcda44894a88a1db67e0…"
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-c360-73ae-869d-a0c06b…",2011-07-27,"""3693""","""Like A Dream""","""https://www.femjoy.com/post/36…","""""",2024-10-14 04:03:13.074018,2024-10-14 04:03:13.074018,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3693"", ""title…","""01928931-d9a9-75f2-8701-c45b59…",2024-10-14 07:03:18.569450,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2011-07-27 - Like A D…","""{""$type"": ""GalleryZipFileMetad…","[{""79e16c48-b3df-4fe4-9622-3722fe646618"",""danica"",""Danica"",""/models/danica""}]",[],,,"""667f925c2e75bcce862587040dd31b…"


# Matching scenes

In [None]:
oshashes = downloads["ce_downloads_hash_oshash"].unique().to_list()
stash_app_scenes = stash_client.find_scenes_by_oshash(oshashes)
stash_app_scenes

In [9]:
joined_scenes = stash_app_scenes.join(downloads, left_on="stashapp_primary_file_oshash", right_on="ce_downloads_hash_oshash", how="left", coalesce=False)

In [10]:
# Create a list to store scene data
scene_data = []

# Create list of scene objects with filename, phash and duration
scene_objects = joined_scenes.select(
    pl.col("stashapp_primary_file_basename").alias("basename"),
    pl.col("ce_downloads_release_name").alias("title"),
    pl.col("stashapp_primary_file_phash").alias("phash"),
    pl.col("stashapp_primary_file_duration").dt.total_seconds().alias("duration"),
    pl.col("stashapp_stashdb_id").alias("stashdb_id")
).to_dicts()

batch_size = 100

stashdb_scene_batches = []
for i in range(0, len(scene_objects), batch_size):
    batch = scene_objects[i:i+batch_size]
    batch_stashdb_scenes = stashbox_client.query_scenes_by_phash(batch)
    stashdb_scene_batches.append(batch_stashdb_scenes)

df_stashdb_scenes = pl.concat(stashdb_scene_batches)

In [11]:
joined_scenes = joined_scenes.join(df_stashdb_scenes, left_on="stashapp_primary_file_phash", right_on="queried_phash", how="left", coalesce=False)

In [None]:
# Get the stashapp_ids that have duplicates
duplicate_ids = (joined_scenes.group_by("stashapp_id")
                .agg(pl.col("stashapp_id").count().alias("scene_count"))
                .filter(pl.col("scene_count") > 1)
                .get_column("stashapp_id"))

# Show all rows for scenes that have duplicates
joined_scenes.filter(pl.col("stashapp_id").is_in(duplicate_ids)).sort("stashapp_id")

In [13]:
# parquet_path = "joined_scenes_with_stashdb_scenes_20250105_1715.parquet"
# joined_galleries_with_stashdb_scenes.write_parquet(parquet_path)
# joined_scenes = pl.read_parquet(parquet_path)

In [None]:
def calculate_duration_difference(stashapp_duration, stashdb_duration):
    return (
        pl.when(stashapp_duration.is_not_null() & stashdb_duration.is_not_null())
        .then(
            ((stashapp_duration - stashdb_duration).abs() / 
             pl.max_horizontal([stashapp_duration, stashdb_duration])) * 100
        )
        .otherwise(None)
    )

def calculate_title_similarity(ce_title, stashdb_title):
    return (
        pl.when(ce_title.is_not_null() & stashdb_title.is_not_null())
        .then(
            pl.struct([ce_title, stashdb_title])
            .map_elements(
                lambda row: levenshtein(str(row[0]), str(row[1])),
                return_dtype=pl.Int64
            )
        )
        .otherwise(None)
    )

def get_date_difference_days(ce_date, stashdb_date):
    return (
        pl.when(ce_date.is_not_null() & stashdb_date.is_not_null())
        .then(
            (ce_date.cast(pl.Datetime) - stashdb_date.cast(pl.Datetime)).dt.total_days().abs()
        )
        .otherwise(None)
    )

# First create the calculated columns
df_verification = joined_scenes.with_columns([
    calculate_duration_difference(
        pl.col("stashapp_primary_file_duration"), 
        pl.col("duration")
    ).alias("duration_diff_pct"),
    
    pl.struct(["ce_downloads_release_name", "title"])
        .map_elements(lambda x: levenshtein(x["ce_downloads_release_name"], x["title"]), return_dtype=pl.Int64)
        .alias("title_levenshtein"),
    
    get_date_difference_days(
        pl.col("ce_downloads_release_date"),
        pl.col("date")
    ).alias("date_diff_days"),
])

# Then add the warning flags
df_verification = df_verification.with_columns([
    # Add warning flags
    (pl.col("duration_diff_pct") > 5).alias("duration_warning"),
    (pl.col("title_levenshtein") > 5).alias("title_warning"),
    (pl.col("date_diff_days") > 7).alias("date_warning")
])


df_verification_warnings = df_verification.filter(
    pl.col("date_warning") | pl.col("duration_warning") | pl.col("title_warning")
).select([
    "stashapp_id",
    "id",
    "stashapp_title",
    "ce_downloads_release_name",
    "title",
    "title_levenshtein",
    "ce_downloads_release_date",
    "date",
    "date_diff_days",
    "stashapp_primary_file_duration",
    "duration",
    "duration_diff_pct",
])
df_verification_warnings

In [None]:
joined_scenes_ce_unique_performers = (
    joined_scenes
    .select(pl.col("ce_downloads_performers"))
    .explode("ce_downloads_performers")
    .select([
        pl.col("ce_downloads_performers").struct.field("uuid").alias("performer_uuid"),
        pl.col("ce_downloads_performers").struct.field("name").alias("performer_name")
    ])
    .unique()
    .sort("performer_name")
)
joined_scenes_ce_unique_performers

In [None]:
all_stashapp_performers = stash_client.get_performers()
all_stashapp_performers = all_stashapp_performers.with_columns(
    pl.col("stashapp_custom_fields").list.eval(
        pl.when(pl.element().struct.field("key") == f"CultureExtractor.{selected_studio['ce_sites_short_name']}")
        .then(pl.element().struct.field("value"))
        .otherwise(None)
    ).list.eval(
        pl.element().filter(pl.element().is_not_null())
    ).list.first().alias("ce_custom_field_value")
)
all_stashapp_performers

In [None]:
# Check for Culture Extractor performers that have not been matched to a StashApp performer
unmatched_performers_df = joined_scenes_ce_unique_performers.join(
    all_stashapp_performers.filter(pl.col("ce_custom_field_value").is_not_null()), 
    left_on="performer_uuid", 
    right_on="ce_custom_field_value", 
    how="left",  # Changed from "inner" to "left"
    coalesce=False
).filter(
    pl.col("ce_custom_field_value").is_null()  # Only show performers without matches
)
unmatched_performers_df

In [None]:
from libraries.performer_matcher import PerformerMatcher

# Create matcher instance
matcher = PerformerMatcher(all_stashapp_performers)

# Your DataFrame already has the required columns, but we need to process each row
all_matches = []

# Process each row in your DataFrame
for row in joined_scenes.iter_rows(named=True):
    data_frame = pl.DataFrame([{
        'ce_downloads_performers': row['ce_downloads_performers'],
        'stashapp_performers': row['stashapp_performers'],
        'performers': row['performers']
    }])
    
    matches = matcher.match_performers(
        data_frame['ce_downloads_performers'],
        data_frame['stashapp_performers'],
        data_frame['performers']
    )
    
    # Add scene context to matches
    for match in matches:
        all_matches.append({
            'scene_id': row['stashapp_id'],
            'scene_title': row['stashapp_title'],
            'ce_uuid': match.ce_uuid,
            'ce_name': match.ce_name,
            'stashapp_id': match.stashapp_id,
            'stashapp_name': match.stashapp_name,
            'stashdb_uuid': match.stashdb_uuid,
            'stashdb_name': match.stashdb_name,
            'confidence': match.confidence,
            'reason': match.reason
        })

# Convert matches to DataFrame for analysis
joined_performers_matches_df = pl.DataFrame(all_matches)
joined_performers_matches_df = joined_performers_matches_df.sort("stashapp_name")
joined_performers_matches_df

In [None]:
new_performers = joined_performers_matches_df.filter(pl.col("stashapp_id").eq(-1))
new_performers

In [None]:
for row in new_performers.select(pl.col(["ce_uuid", "stashdb_uuid", "stashdb_name"])).unique().iter_rows(named=True):
    stash_raw_client.create_performer({
        "name": row["stashdb_name"],
        "stash_ids": [
            {
                "endpoint": "https://stashdb.org/graphql",
                "stash_id": row["stashdb_uuid"]
            }
        ],
        "custom_fields": {
            "CultureExtractor." + selected_studio["ce_sites_short_name"]: row["ce_uuid"]
        }
    })
    print(f"Created performer {row['stashdb_name']}")

In [None]:
existing_performers = joined_performers_matches_df.filter(pl.col("stashapp_id").ne(-1))
existing_performers

In [None]:
custom_field_name = "CultureExtractor." + selected_studio["ce_sites_short_name"]
for row in existing_performers.select(pl.col(["ce_uuid", "stashapp_id", "stashapp_name"])).unique().iter_rows(named=True):
    stash_client.update_performer_custom_fields(row["stashapp_id"], {custom_field_name: row["ce_uuid"]})
    print(f"Setting custom field {custom_field_name} for {row['stashapp_name']} ({row['stashapp_id']}) to {row['ce_uuid']}")

In [None]:
ce_performer_mapping = stash_client.get_performers().with_columns([
    pl.col("stashapp_custom_fields").list.eval(
        pl.element().struct.field("value").filter(
            pl.element().struct.field("key") == "CultureExtractor." + selected_studio["ce_sites_short_name"]
        )
    ).list.first().alias("ce_custom_field_value")
]).filter(
    pl.col("ce_custom_field_value").is_not_null()
).select(
    pl.col("ce_custom_field_value").alias("ce_performer_uuid"),
    pl.col("stashapp_id").alias("stashapp_id"),
    pl.col("stashapp_name").alias("stashapp_name")
).sort(by=["stashapp_name"])
ce_performer_mapping

In [None]:
# First get all unique performer IDs from the scenes
unique_stashdb_performer_ids = joined_scenes.select([
    pl.col("performers").list.eval(
        pl.element().struct.field("performer").struct.field("id")
    )
]).explode(
    pl.col("performers")
).unique()
unique_stashdb_performer_ids

# Then join with StashApp performers that have StashDB IDs
stashdb_performer_mapping = stash_client.get_performers().with_columns([
    # Find the StashDB ID by filtering the stash_ids list first
    pl.col("stashapp_stash_ids").list.eval(
        pl.when(pl.element().struct.field("endpoint") == "https://stashdb.org/graphql")
        .then(pl.element().struct.field("stash_id"))
        .otherwise(None)
    ).list.eval(
        pl.element().filter(pl.element().is_not_null())
    ).list.first().alias("stashdb_id")
]).filter(
    pl.col("stashdb_id").is_not_null()
).select([
    pl.col("stashdb_id"),
    pl.col("stashapp_id"),
    pl.col("stashapp_name")
]).join(
    unique_stashdb_performer_ids,
    left_on="stashdb_id",
    right_on="performers",
    how="inner"
).sort(by=["stashapp_name"])
stashdb_performer_mapping

In [None]:
ce_performer_mapping.join(stashdb_performer_mapping, on="stashapp_id", how="inner", coalesce=False).sort("stashapp_name")

In [42]:
# Find names in CE but not in StashDB (left difference)
names_only_in_ce = ce_performer_mapping.join(
    stashdb_performer_mapping, 
    on="stashapp_id", 
    how="anti"
)

# Find names in StashDB but not in CE (right difference)
names_only_in_stashdb = stashdb_performer_mapping.join(
    ce_performer_mapping, 
    on="stashapp_id", 
    how="anti"
)

if len(names_only_in_ce) > 0:
    print("Names only in Culture Extractor:")
    print(names_only_in_ce)
if len(names_only_in_stashdb) > 0:
    print("\nNames only in StashDB:")
    print(names_only_in_stashdb)

In [None]:
downloads.filter(
    pl.col("ce_downloads_release_uuid").is_in(
        joined_scenes.get_column("ce_downloads_release_uuid").unique().to_list()
    )
).select(
    pl.col("ce_downloads_file_type"),
    pl.col("ce_downloads_content_type"),
    pl.col("ce_downloads_variant"),
).unique().sort(by=["ce_downloads_file_type", "ce_downloads_content_type", "ce_downloads_variant"])

In [None]:
import base64

def create_update_dataframe(joined_scenes, downloads, all_stashapp_performers, all_tags, stashapp_studio_id):
    # Get all scene data ready for updates
    updates_df = joined_scenes.select([
        pl.col("ce_downloads_release_uuid").alias("ce_release_uuid"),
        pl.col("stashapp_id").alias("scene_id"),
        pl.col("stashapp_primary_file_basename").alias("primary_file_basename"),
        pl.col("ce_downloads_release_date").alias("date"),
        pl.col("ce_downloads_release_name").alias("title"),
        pl.col("ce_downloads_release_short_name").alias("code"),
        pl.col("ce_downloads_release_description").alias("details"),
        pl.lit(stashapp_studio_id).alias("studio_id"),
        pl.col("ce_downloads_release_url").alias("url"),
        pl.col("ce_downloads_release_uuid"),
        pl.col("id").alias("stashdb_id"),
        pl.col("ce_downloads_performers"),
        pl.col("performers"),
        pl.col("tags").alias("stashdb_tags")
    ])

    # Map performers - now with unique values
    updates_df = updates_df.with_columns([
        # Get Culture Extractor UUIDs
        pl.col("ce_downloads_performers").list.eval(
            pl.element().struct.field("uuid")
        ).list.unique().alias("ce_performer_uuids"),
        
        # Get StashDB IDs
        pl.col("performers").list.eval(
            pl.element().struct.field("performer").struct.field("id")
        ).list.unique().alias("stashdb_performer_ids")
    ])

    # Join performer IDs with unique values
    updates_df = updates_df.with_columns([
        pl.when(pl.col("ce_performer_uuids").is_not_null())
        .then(
            pl.col("ce_performer_uuids").map_elements(
                lambda uuids: ce_performer_mapping.filter(
                    pl.col("ce_performer_uuid").is_in(uuids)
                ).get_column("stashapp_id").unique().to_list(),
                return_dtype=pl.List(pl.Int64)
            )
        )
        .otherwise(pl.Series([[]]))
        .alias("ce_performer_stashapp_ids"),

        pl.when(pl.col("ce_performer_uuids").is_not_null())
        .then(
            pl.col("ce_performer_uuids").map_elements(
                lambda uuids: ce_performer_mapping.filter(
                    pl.col("ce_performer_uuid").is_in(uuids)
                ).get_column("stashapp_name").unique().to_list(),
                return_dtype=pl.List(pl.Utf8)
            )
        )
        .otherwise(pl.Series([[]]))
        .alias("ce_performer_stashapp_names")
    ])
    
    updates_df = updates_df.with_columns([
        pl.when(pl.col("performers").is_not_null())
        .then(
            pl.col("stashdb_performer_ids").map_elements(
                lambda uuids: stashdb_performer_mapping.filter(
                    pl.col("stashdb_id").is_in(uuids)
                ).get_column("stashapp_id").unique().to_list(),
                return_dtype=pl.List(pl.Int64)
            )
        )
        .otherwise(pl.Series([[]]))
        .alias("stashdb_performer_stashapp_ids"),

        pl.when(pl.col("stashdb_performer_ids").is_not_null())
        .then(
            pl.col("stashdb_performer_ids").map_elements(
                lambda uuids: stashdb_performer_mapping.filter(
                    pl.col("stashdb_id").is_in(uuids)
                ).get_column("stashapp_name").unique().to_list(),
                return_dtype=pl.List(pl.Utf8)
            )
        )
        .otherwise(pl.Series([[]]))
        .alias("stashdb_performer_stashapp_names")
    ])

    # Combine performer IDs with unique values
    updates_df = updates_df.with_columns([
        pl.concat_list([
            pl.col("ce_performer_stashapp_ids"),
            pl.col("stashdb_performer_stashapp_ids")
        ]).list.unique().alias("performer_ids"),
        
        pl.concat_list([
            pl.col("ce_performer_stashapp_names"),
            pl.col("stashdb_performer_stashapp_names")
        ]).list.unique().alias("performer_names")
    ])

    # Map tags
    tag_mapping = pl.DataFrame({
        "stashdb_name": [tag["name"] for tag in all_tags],
        "stashapp_id": [tag["id"] for tag in all_tags]
    })

    updates_df = updates_df.with_columns([
        pl.when(pl.col("stashdb_tags").is_not_null())
        .then(
            pl.col("stashdb_tags").map_elements(
                lambda tags: tag_mapping.filter(
                    pl.col("stashdb_name").is_in([t["name"] for t in tags])
                ).get_column("stashapp_id").to_list(),
                return_dtype=pl.List(pl.Utf8)
            )
        )
        .otherwise(pl.Series([[]]))
        .alias("tag_ids")
    ])

    # Get scene images
    scene_images = downloads.filter(
        pl.col("ce_downloads_file_type") == "image",
        pl.col("ce_downloads_content_type").is_in(["poster", "scene"])
    ).select([
        pl.col("ce_downloads_release_uuid"),
        pl.col("ce_downloads_saved_filename").alias("scene_image_filename")
    ])

    # Get gallery info
    galleries = downloads.filter(
        (pl.col("ce_downloads_content_type") == "gallery") &
        (pl.col("ce_downloads_variant").is_in(["Large", ""]))
    ).select([
        pl.col("ce_downloads_release_uuid"),
        pl.col("ce_downloads_hash_sha256").alias("gallery_hash")
    ])

    # Join images and galleries
    updates_df = updates_df.join(
        scene_images,
        on="ce_downloads_release_uuid",
        how="left"
    ).join(
        galleries,
        on="ce_downloads_release_uuid",
        how="left"
    )

    return updates_df

def generate_update_inputs(updates_df, stash_raw_client):
    updates = []
    
    for row in updates_df.iter_rows(named=True):
        # Get current scene data
        refreshed_scene = stash_raw_client.find_scene(row["scene_id"])
        existing_scene_galleries = refreshed_scene.get("galleries", [])
        existing_scene_gallery_id = existing_scene_galleries[0]["id"] if existing_scene_galleries else None
        
        # Load scene image
        try:
            image_path = os.path.join(
                "F:\\Ripping\\" + selected_studio["ce_sites_name"] + "\\Metadata", 
                row["ce_downloads_release_uuid"],
                row["scene_image_filename"]
            )
            scene_image_base64 = base64.b64encode(open(image_path, "rb").read()).decode("utf-8")
        except Exception as e:
            scene_image_base64 = None

        # Find gallery if exists
        gallery_id = None
        refreshed_gallery = None  # Initialize refreshed_gallery
        existing_gallery_urls = []
        if existing_scene_gallery_id:
            gallery_id = existing_scene_gallery_id
        elif row["gallery_hash"]:
            found_galleries = stash_raw_client.find_galleries(q=row["gallery_hash"])
            if len(found_galleries) == 1:
                gallery_id = found_galleries[0]["id"]

        if gallery_id:
            refreshed_gallery = stash_raw_client.find_gallery(gallery_id)
            existing_gallery_urls = refreshed_gallery.get("urls", [])

        # Handle potentially null values
        existing_scene_tag_ids = sorted([tag["id"] for tag in refreshed_scene.get("tags", [])])
        existing_gallery_tag_ids = sorted([tag["id"] for tag in refreshed_gallery.get("tags", [])]) if refreshed_gallery else []
        new_tag_ids = sorted(row["tag_ids"]) if row["tag_ids"] is not None else []
        
        existing_performer_ids = [int(performer["id"]) for performer in refreshed_scene.get("performers", [])]
        new_performer_ids = row["performer_ids"] if row["performer_ids"] is not None else []
        
        existing_urls = refreshed_scene.get("urls", [])
        new_url = [row["url"]] if row["url"] is not None else []
        
        existing_stash_ids = refreshed_scene.get("stash_ids", [])

        new_stash_ids = []
        if row.get("stashdb_id"):
            new_stash_ids.append({
                "endpoint": "https://stashdb.org/graphql",
                "stash_id": row["stashdb_id"]
            })
        if row.get("ce_downloads_release_uuid"):
            new_stash_ids.append({
                "endpoint": "https://culture.extractor/graphql", 
                "stash_id": row["ce_downloads_release_uuid"]
            })

        scene_stash_ids = list({
            (stash_id["endpoint"], stash_id["stash_id"]): stash_id
            for stash_id in existing_stash_ids + new_stash_ids
        }.values())

        update = {
            "ce_release_uuid": row["ce_release_uuid"],
            "scene_id": row["scene_id"],
            "primary_file_basename": row["primary_file_basename"],
            "existing_scene_gallery_id": existing_scene_gallery_id,
            "gallery_id": gallery_id if gallery_id else existing_scene_gallery_id,
            "existing_scene_date": refreshed_scene.get("date", None),
            "existing_gallery_date": refreshed_gallery.get("date", None) if refreshed_gallery else None,
            "date": row["date"].strftime("%Y-%m-%d") if row["date"] else None,
            "existing_scene_title": refreshed_scene.get("title", None),
            "existing_gallery_title": refreshed_gallery.get("title", None) if refreshed_gallery else None,
            "title": row["title"],
            "existing_scene_code": refreshed_scene.get("code", None),
            "existing_gallery_code": refreshed_gallery.get("code", None) if refreshed_gallery else None,
            "code": row["code"],
            "existing_scene_details": refreshed_scene.get("details", None),
            "existing_gallery_details": refreshed_gallery.get("details", None) if refreshed_gallery else None,
            "details": row["details"],
            "existing_scene_studio_id": refreshed_scene.get("studio", {}).get("id") if refreshed_scene.get("studio") else None,
            "existing_gallery_studio_id": refreshed_gallery.get("studio", {}).get("id") if refreshed_gallery and refreshed_gallery.get("studio") else None,
            "studio_id": row["studio_id"],
            "existing_scene_performers": refreshed_scene.get("performers", []),
            "existing_gallery_performers": refreshed_gallery.get("performers", []) if refreshed_gallery else [],
            "performer_ids": list(set(existing_performer_ids + new_performer_ids)),
            "existing_scene_tags": existing_scene_tag_ids,
            "existing_gallery_tags": existing_gallery_tag_ids,
            "scene_tag_ids": sorted(list(set(existing_scene_tag_ids + new_tag_ids))),
            "gallery_tag_ids": sorted(list(set(existing_gallery_tag_ids + new_tag_ids))) if refreshed_gallery else [],
            "existing_scene_urls": refreshed_scene.get("urls", []),
            "scene_urls": existing_urls + new_url,
            "existing_gallery_urls": refreshed_gallery.get("urls", []) if refreshed_gallery else [],
            "gallery_urls": (existing_gallery_urls + [
                row["url"],
                f"https://culture.extractor/galleries/{row['ce_downloads_release_uuid']}"
            ]) if gallery_id else None,
            "cover_image": f"data:image/jpeg;base64,{scene_image_base64}",
            "scene_stash_ids": scene_stash_ids
        }
        updates.append(update)

    return pl.DataFrame(updates)

# Usage
updates_df = create_update_dataframe(
    joined_scenes,
    downloads,
    all_stashapp_performers,
    all_tags,
    selected_studio["stash_studios_id"]
)

update_inputs_df = generate_update_inputs(updates_df, stash_raw_client)

update_inputs_df = update_inputs_df.sort(by=["date"])

# Review updates before applying
print("Updates to be applied:")
print(update_inputs_df)

In [47]:
# Apply updates if everything looks good
for update in update_inputs_df.iter_rows(named=True):
    # Update scene
    scene_input = {
        "id": update["scene_id"],
        "date": update["date"],
        "title": update["title"],
        "code": update["code"],
        "details": update["details"],
        "studio_id": update["studio_id"],
        "performer_ids": update["performer_ids"],
        "tag_ids": update["scene_tag_ids"],
        "urls": update["scene_urls"],
        "cover_image": update["cover_image"],
        "stash_ids": update["scene_stash_ids"]
    }
    if update["gallery_id"]:
        scene_input["gallery_ids"] = [update["gallery_id"]]
    
    try:
        stash_raw_client.update_scene(scene_input)
    except Exception as e:
        print(f"Error updating scene for {update['scene_id']}: {e}")
        continue

    # Update gallery if exists
    if update["gallery_id"]:
        gallery_input = {
            "id": update["gallery_id"],
            "date": update["date"],
            "title": update["title"],
            "code": update["code"],
            "details": update["details"],
            "studio_id": update["studio_id"],
            "performer_ids": update["performer_ids"],
            "tag_ids": update["gallery_tag_ids"],
            "urls": update["gallery_urls"]
        }
        try:
            stash_raw_client.update_gallery(gallery_input)
        except Exception as e:
            print(f"Error updating gallery for {update['scene_id']}: {e}")
            continue


# Matching standalone galleries

In [8]:
sha256_hashes = downloads["ce_downloads_hash_sha256"].unique().to_list()
stash_app_galleries = stash_client.find_galleries_by_sha256(sha256_hashes)
stash_app_galleries

stashapp_id,stashapp_title,stashapp_details,stashapp_date,stashapp_code,stashapp_urls,stashapp_photographer,stashapp_created_at,stashapp_updated_at,stashapp_performers,stashapp_studio,stashapp_files,stashapp_primary_file_path,stashapp_primary_file_basename,stashapp_primary_file_md5,stashapp_primary_file_sha256,stashapp_primary_file_xxhash,stashapp_tags,stashapp_organized,stashapp_stash_ids,stashapp_ce_id
i64,str,str,date,str,list[str],struct[3],datetime[μs],datetime[μs],list[struct[7]],struct[4],list[struct[5]],str,str,str,str,str,list[struct[2]],bool,list[struct[3]],str
6735,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:17:17,2024-12-02 07:17:20,[],,"[{682157,""W:\Culture\Photos\Femjoy - 2022-01-22 - Glamour - Poster Size Edition 5500px - 019287bc-18f7-7730-9eee-93eba514ae77.zip"",""Femjoy - 2022-01-22 - Glamour - Poster Size Edition 5500px - 019287bc-18f7-7730-9eee-93eba514ae77.zip"",159285210,[{""md5"",""07a29c822db6e00818fd5476b33e48a3""}, {""sha256"",""7dba16274793cfde84dd403df8d0dd34a8cf2e0424a01ce65c887825365af991""}, {""xxhash"",""d40c10377afaba8f""}]}]","""W:\Culture\Photos\Femjoy - 202…","""Femjoy - 2022-01-22 - Glamour …","""07a29c822db6e00818fd5476b33e48…","""7dba16274793cfde84dd403df8d0dd…","""d40c10377afaba8f""",[],false,,
6525,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:06:48,2024-12-02 07:06:50,[],,"[{661651,""W:\Culture\Photos\Femjoy - 2015-03-14 - Horny - Poster Size Edition 5500px - 0192875e-ae6e-733a-8b48-93d01301a73d.zip"",""Femjoy - 2015-03-14 - Horny - Poster Size Edition 5500px - 0192875e-ae6e-733a-8b48-93d01301a73d.zip"",199729102,[{""md5"",""3cd2027f5678713a26608a40391921b5""}, {""sha256"",""7f31d2abb843ffed9fd39424ca8a5bd7f5c893206bec9d293ff446e416c0152a""}, {""xxhash"",""2c95264d36b376fc""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2015-03-14 - Horny - …","""3cd2027f5678713a26608a40391921…","""7f31d2abb843ffed9fd39424ca8a5b…","""2c95264d36b376fc""",[],false,,
6593,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:09:21,2024-12-02 07:09:22,[],,"[{668477,""W:\Culture\Photos\Femjoy - 2016-04-24 - For You - Poster Size Edition 5500px - 01928773-4e24-75e7-8972-98177155685c.zip"",""Femjoy - 2016-04-24 - For You - Poster Size Edition 5500px - 01928773-4e24-75e7-8972-98177155685c.zip"",76902000,[{""md5"",""72801e9ce2300b4e5605de1b745c91e8""}, {""sha256"",""196a6ef64a2b18d2de6be9110a69b0c9f4edf149910c6c634c04debbf0d063cf""}, {""xxhash"",""faa4383ebb8858d9""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2016-04-24 - For You …","""72801e9ce2300b4e5605de1b745c91…","""196a6ef64a2b18d2de6be9110a69b0…","""faa4383ebb8858d9""",[],false,,
6298,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:01:55,2024-12-02 07:01:56,[],,"[{643983,""W:\Culture\Photos\Femjoy - 2012-08-21 - Starlight - Poster Size Edition 5500px - 019288f7-2e14-77dd-8a08-d955b93072a6.zip"",""Femjoy - 2012-08-21 - Starlight - Poster Size Edition 5500px - 019288f7-2e14-77dd-8a08-d955b93072a6.zip"",35123679,[{""md5"",""b639d858777bd5775af653699e4028cf""}, {""sha256"",""72b002d8d87b31d6c5b4f30494505905f725ce9e28640e7a13e826b2c06a1221""}, {""xxhash"",""4afdfaca7ac28a42""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2012-08-21 - Starligh…","""b639d858777bd5775af653699e4028…","""72b002d8d87b31d6c5b4f304945059…","""4afdfaca7ac28a42""",[],false,,
6422,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:03:58,2024-12-02 07:04:00,[],,"[{652202,""W:\Culture\Photos\Femjoy - 2014-01-08 - I Can Be Your Friend - Poster Size Edition 5500px - 01928747-9fed-73e8-8751-517f353aa309.zip"",""Femjoy - 2014-01-08 - I Can Be Your Friend - Poster Size Edition 5500px - 01928747-9fed-73e8-8751-517f353aa309.zip"",88311071,[{""md5"",""40a04443e1a6466332bc4bbb5b20ac35""}, {""sha256"",""0e5c6bbb30db0bc7f8c6dec8e936cf9143b0dc1b0be5c4122a6ce509224e3291""}, {""xxhash"",""e8f78111835ddc67""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2014-01-08 - I Can Be…","""40a04443e1a6466332bc4bbb5b20ac…","""0e5c6bbb30db0bc7f8c6dec8e936cf…","""e8f78111835ddc67""",[],false,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
5935,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:57:07,2024-12-02 06:57:07,[],,"[{621751,""W:\Culture\Photos\Femjoy - 2008-10-14 - Azzurra - Poster Size Edition 5500px - 019288ec-5eb8-7634-ab66-b3ee28d4b1d1.zip"",""Femjoy - 2008-10-14 - Azzurra - Poster Size Edition 5500px - 019288ec-5eb8-7634-ab66-b3ee28d4b1d1.zip"",139149945,[{""md5"",""7e3b845be103209c774bc69542c5c948""}, {""sha256"",""2e595672ed8654cae3923335770b83a9480a85392f27f84d04afe79fd29db09e""}, {""xxhash"",""12506f57a8f4e7a4""}]}]","""W:\Culture\Photos\Femjoy - 200…","""Femjoy - 2008-10-14 - Azzurra …","""7e3b845be103209c774bc69542c5c9…","""2e595672ed8654cae3923335770b83…","""12506f57a8f4e7a4""",[],false,,
6160,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:00:05,2024-12-02 07:00:05,[],,"[{634790,""W:\Culture\Photos\Femjoy - 2011-07-17 - Pure - Poster Size Edition 5500px - 01928731-0b60-75d1-9d02-7c14208995c6.zip"",""Femjoy - 2011-07-17 - Pure - Poster Size Edition 5500px - 01928731-0b60-75d1-9d02-7c14208995c6.zip"",28094327,[{""md5"",""522a946817f05ff1e7ce60b5bd1aaa99""}, {""sha256"",""0ad97182fbb17ae8771a8462158b3649c819b98b142c1fc87c17597d9c8e9c3d""}, {""xxhash"",""b2c143858aa76f18""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2011-07-17 - Pure - P…","""522a946817f05ff1e7ce60b5bd1aaa…","""0ad97182fbb17ae8771a8462158b36…","""b2c143858aa76f18""",[],false,,
6058,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:58:41,2024-12-02 06:58:41,[],,"[{628543,""W:\Culture\Photos\Femjoy - 2010-10-06 - Back Home - Poster Size Edition 5500px - 01928725-2f5c-7274-9dab-966cfd3badd9.zip"",""Femjoy - 2010-10-06 - Back Home - Poster Size Edition 5500px - 01928725-2f5c-7274-9dab-966cfd3badd9.zip"",29012190,[{""md5"",""2f8225ee45b7f3f661fa0c52696d3302""}, {""sha256"",""3b64fe1e179bae031a21125321dfb4738cc81851ceaa4532c8108efd3c74ef46""}, {""xxhash"",""79baed15baf313ca""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2010-10-06 - Back Hom…","""2f8225ee45b7f3f661fa0c52696d33…","""3b64fe1e179bae031a21125321dfb4…","""79baed15baf313ca""",[],false,,
5871,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:56:20,2024-12-02 06:56:21,[],,"[{618518,""W:\Culture\Photos\Femjoy - 2007-09-27 - Ballroom - Poster Size Edition 5500px - 0192870f-16f1-73db-bd4c-0877137f9379.zip"",""Femjoy - 2007-09-27 - Ballroom - Poster Size Edition 5500px - 0192870f-16f1-73db-bd4c-0877137f9379.zip"",22425705,[{""md5"",""6e31be13740ca1f54db3afc1e5418bda""}, {""sha256"",""2a658969a3a501f97d62e84304c348fc90ae50290fe1d042e8ee2c55716d531d""}, {""xxhash"",""14001fd58cb35253""}]}]","""W:\Culture\Photos\Femjoy - 200…","""Femjoy - 2007-09-27 - Ballroom…","""6e31be13740ca1f54db3afc1e5418b…","""2a658969a3a501f97d62e84304c348…","""14001fd58cb35253""",[],false,,


In [12]:
joined_galleries = downloads.join(stash_app_galleries, left_on="ce_downloads_hash_sha256", right_on="stashapp_primary_file_sha256", coalesce=False)
joined_galleries

ce_downloads_site_uuid,ce_downloads_site_name,ce_downloads_sub_site_name,ce_downloads_release_uuid,ce_downloads_release_date,ce_downloads_release_short_name,ce_downloads_release_name,ce_downloads_release_url,ce_downloads_release_description,ce_downloads_release_created,ce_downloads_release_last_updated,ce_downloads_release_available_files,ce_downloads_release_json_document,ce_downloads_uuid,ce_downloads_downloaded_at,ce_downloads_file_type,ce_downloads_content_type,ce_downloads_variant,ce_downloads_available_file,ce_downloads_original_filename,ce_downloads_saved_filename,ce_downloads_file_metadata,ce_downloads_performers,ce_downloads_tags,ce_downloads_hash_oshash,ce_downloads_hash_phash,ce_downloads_hash_sha256,stashapp_id,stashapp_title,stashapp_details,stashapp_date,stashapp_code,stashapp_urls,stashapp_photographer,stashapp_created_at,stashapp_updated_at,stashapp_performers,stashapp_studio,stashapp_files,stashapp_primary_file_path,stashapp_primary_file_basename,stashapp_primary_file_md5,stashapp_primary_file_sha256,stashapp_primary_file_xxhash,stashapp_tags,stashapp_organized,stashapp_stash_ids,stashapp_ce_id
str,str,str,str,date,str,str,str,str,datetime[μs],datetime[μs],str,str,str,datetime[μs],str,str,str,str,str,str,str,list[struct[4]],list[struct[4]],str,str,str,i64,str,str,date,str,list[str],struct[3],datetime[μs],datetime[μs],list[struct[7]],struct[4],list[struct[5]],str,str,str,str,str,list[struct[2]],bool,list[struct[3]],str
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286eb-2af7-735a-bf8f-fa8176…",2005-05-01,"""7545""","""Kinga""","""https://www.femjoy.com/post/75…","""""",2024-10-13 17:26:52.002778,2024-10-13 17:26:52.002778,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""7545"", ""title…","""019286eb-2f85-74d2-837b-78b02c…",2024-10-13 20:26:53.061396,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2005-05-01 - Kinga - …","""{""$type"": ""GalleryZipFileMetad…","[{""acf38e04-cc75-4cf4-95f3-8253344a9a03"",""kinga"",""Kinga"",""/models/kinga""}]",[],,,"""3635cdc81e33e9d41b7b589f807bd4…",5778,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:55:35,2024-12-02 06:55:36,[],,"[{614966,""W:\Culture\Photos\Femjoy - 2005-05-01 - Kinga - Poster Size Edition 5500px - 019286eb-2af7-735a-bf8f-fa8176895e53.zip"",""Femjoy - 2005-05-01 - Kinga - Poster Size Edition 5500px - 019286eb-2af7-735a-bf8f-fa8176895e53.zip"",11420982,[{""md5"",""d2fb359d5bae3414f01b15b946361018""}, {""sha256"",""3635cdc81e33e9d41b7b589f807bd442d2c4fe8d4dd4f22d2b8dbedc282f60c3""}, {""xxhash"",""783b2ba3e6b1d0a6""}]}]","""W:\Culture\Photos\Femjoy - 200…","""Femjoy - 2005-05-01 - Kinga - …","""d2fb359d5bae3414f01b15b9463610…","""3635cdc81e33e9d41b7b589f807bd4…","""783b2ba3e6b1d0a6""",[],false,,
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286eb-35d3-75c8-a15e-6c4025…",2005-06-29,"""7585""","""Cheyenne""","""https://www.femjoy.com/post/75…","""""",2024-10-13 17:26:54.790255,2024-10-13 17:26:54.790255,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""7585"", ""title…","""019286eb-3b59-7516-af2c-d32645…",2024-10-13 20:26:56.089697,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2005-06-29 - Cheyenne…","""{""$type"": ""GalleryZipFileMetad…","[{""acf38e04-cc75-4cf4-95f3-8253344a9a03"",""kinga"",""Kinga"",""/models/kinga""}]",[],,,"""f97f8fac081a81f314f30f1bba390e…",5784,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:55:37,2024-12-02 06:55:37,[],,"[{615102,""W:\Culture\Photos\Femjoy - 2005-06-29 - Cheyenne - Poster Size Edition 5500px - 019286eb-35d3-75c8-a15e-6c40256818e7.zip"",""Femjoy - 2005-06-29 - Cheyenne - Poster Size Edition 5500px - 019286eb-35d3-75c8-a15e-6c40256818e7.zip"",20910768,[{""md5"",""4e9d3b61d164073f55139ce782b20002""}, {""sha256"",""f97f8fac081a81f314f30f1bba390e5bdcf35ce5f05edf1641816732216bdced""}, {""xxhash"",""91af22baa404df8c""}]}]","""W:\Culture\Photos\Femjoy - 200…","""Femjoy - 2005-06-29 - Cheyenne…","""4e9d3b61d164073f55139ce782b200…","""f97f8fac081a81f314f30f1bba390e…","""91af22baa404df8c""",[],false,,
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286eb-42a6-70be-a72f-60e952…",2005-07-05,"""7583""","""Duette""","""https://www.femjoy.com/post/75…","""""",2024-10-13 17:26:58.072938,2024-10-13 17:26:58.072938,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""7583"", ""title…","""019286eb-4737-7494-898c-c1820b…",2024-10-13 20:26:59.127045,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2005-07-05 - Duette -…","""{""$type"": ""GalleryZipFileMetad…","[{""acf38e04-cc75-4cf4-95f3-8253344a9a03"",""kinga"",""Kinga"",""/models/kinga""}, {""c4d6f38d-1885-4b1d-8930-bd048823e1df"",""eva"",""Eva"",""/models/eva""}]",[],,,"""94ab5f423536a61e67af38ef1a8abe…",5786,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:55:39,2024-12-02 06:55:39,[],,"[{615211,""W:\Culture\Photos\Femjoy - 2005-07-05 - Duette - Poster Size Edition 5500px - 019286eb-42a6-70be-a72f-60e952da24fd.zip"",""Femjoy - 2005-07-05 - Duette - Poster Size Edition 5500px - 019286eb-42a6-70be-a72f-60e952da24fd.zip"",14795513,[{""md5"",""e369727b709cf12465d9e9a1d1935c65""}, {""sha256"",""94ab5f423536a61e67af38ef1a8abed0bef91879dc4404d212d42baf76d64fd1""}, {""xxhash"",""82105de80a1421a3""}]}]","""W:\Culture\Photos\Femjoy - 200…","""Femjoy - 2005-07-05 - Duette -…","""e369727b709cf12465d9e9a1d1935c…","""94ab5f423536a61e67af38ef1a8abe…","""82105de80a1421a3""",[],false,,
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286eb-4d89-745d-ae79-63f12f…",2005-07-11,"""7564""","""Adoration""","""https://www.femjoy.com/post/75…","""""",2024-10-13 17:27:00.878387,2024-10-13 17:27:00.878387,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""7564"", ""title…","""019286eb-5076-77b6-9d6b-822729…",2024-10-13 20:27:01.494714,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2005-07-11 - Adoratio…","""{""$type"": ""GalleryZipFileMetad…","[{""6c293b62-af42-44f9-8bd3-411578247453"",""corinna"",""Corinna"",""/models/corinna""}]",[],,,"""6c9cb43fce700d5e67beca2175e58b…",5787,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:55:39,2024-12-02 06:55:39,[],,"[{615229,""W:\Culture\Photos\Femjoy - 2005-07-11 - Adoration - Poster Size Edition 5500px - 019286eb-4d89-745d-ae79-63f12f01b2d6.zip"",""Femjoy - 2005-07-11 - Adoration - Poster Size Edition 5500px - 019286eb-4d89-745d-ae79-63f12f01b2d6.zip"",7367447,[{""md5"",""e1b8f310d00a6bab232abe75d6adc7a3""}, {""sha256"",""6c9cb43fce700d5e67beca2175e58b3886e594b454106830b965127ccae1f14d""}, {""xxhash"",""486b780e380599db""}]}]","""W:\Culture\Photos\Femjoy - 200…","""Femjoy - 2005-07-11 - Adoratio…","""e1b8f310d00a6bab232abe75d6adc7…","""6c9cb43fce700d5e67beca2175e58b…","""486b780e380599db""",[],false,,
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""019286eb-5770-7752-9359-e459fb…",2005-07-25,"""7584""","""Jewel""","""https://www.femjoy.com/post/75…","""""",2024-10-13 17:27:03.406573,2024-10-13 17:27:03.406573,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""7584"", ""title…","""019286ee-030f-763a-a26f-9b707e…",2024-10-13 20:29:58.287113,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2005-07-25 - Jewel - …","""{""$type"": ""GalleryZipFileMetad…","[{""acf38e04-cc75-4cf4-95f3-8253344a9a03"",""kinga"",""Kinga"",""/models/kinga""}]",[],,,"""611561549b82e94a85719a603e7c4c…",5789,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 06:55:40,2024-12-02 06:55:40,[],,"[{615318,""W:\Culture\Photos\Femjoy - 2005-07-25 - Jewel - Poster Size Edition 5500px - 019286eb-5770-7752-9359-e459fb886707.zip"",""Femjoy - 2005-07-25 - Jewel - Poster Size Edition 5500px - 019286eb-5770-7752-9359-e459fb886707.zip"",17155945,[{""md5"",""bef38367e7b1a5785cb36512634c391f""}, {""sha256"",""611561549b82e94a85719a603e7c4cefbf73c0f68ec67435c11c17ade02c1d77""}, {""xxhash"",""75a431944940e79f""}]}]","""W:\Culture\Photos\Femjoy - 200…","""Femjoy - 2005-07-25 - Jewel - …","""bef38367e7b1a5785cb36512634c39…","""611561549b82e94a85719a603e7c4c…","""75a431944940e79f""",[],false,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-74d3-778b-8735-c61224…",2011-08-28,"""3820""","""Go Ahead""","""https://www.femjoy.com/post/38…","""""",2024-10-14 04:02:52.879702,2024-10-14 04:02:52.879702,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3820"", ""title…","""01928931-81fb-731c-96d4-d0a0ff…",2024-10-14 07:02:56.123637,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2011-08-28 - Go Ahead…","""{""$type"": ""GalleryZipFileMetad…","[{""c040ec45-579f-4b9f-8b5b-6772ab9eb28a"",""miela"",""Miela"",""/models/miela""}]",[],,,"""89f95171f1000648d227fe814f95ea…",6176,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:00:18,2024-12-02 07:00:18,[],,"[{635930,""W:\Culture\Photos\Femjoy - 2011-08-28 - Go Ahead - Poster Size Edition 5500px - 01928931-74d3-778b-8735-c6122483fdb5.zip"",""Femjoy - 2011-08-28 - Go Ahead - Poster Size Edition 5500px - 01928931-74d3-778b-8735-c6122483fdb5.zip"",29767311,[{""md5"",""09db4d446869664c4052cb99599fb64f""}, {""sha256"",""89f95171f1000648d227fe814f95ea74dbba32283a7ffc5a506514779c5d6ef7""}, {""xxhash"",""b0171089c3395ad1""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2011-08-28 - Go Ahead…","""09db4d446869664c4052cb99599fb6…","""89f95171f1000648d227fe814f95ea…","""b0171089c3395ad1""",[],false,,
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-87a0-75cc-a1a6-a1ddc0…",2011-09-01,"""3874""","""Rain for the Roses""","""https://www.femjoy.com/post/38…","""""",2024-10-14 04:02:57.680243,2024-10-14 04:02:57.680243,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3874"", ""title…","""01928931-9055-74d2-b772-ea69b6…",2024-10-14 07:02:59.797257,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2011-09-01 - Rain for…","""{""$type"": ""GalleryZipFileMetad…","[{""acf38e04-cc75-4cf4-95f3-8253344a9a03"",""kinga"",""Kinga"",""/models/kinga""}]",[],,,"""6dfa0bb8feeb711fc01311c96aa495…",6177,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:00:18,2024-12-02 07:00:18,[],,"[{635978,""W:\Culture\Photos\Femjoy - 2011-09-01 - Rain for the Roses - Poster Size Edition 5500px - 01928931-87a0-75cc-a1a6-a1ddc07e2caa.zip"",""Femjoy - 2011-09-01 - Rain for the Roses - Poster Size Edition 5500px - 01928931-87a0-75cc-a1a6-a1ddc07e2caa.zip"",34432139,[{""md5"",""9e4ba78e84f5da9cb03a2a7db11a2cf0""}, {""sha256"",""6dfa0bb8feeb711fc01311c96aa495d607c84fa9530fd5aa17a8abfa3f2ddf1a""}, {""xxhash"",""9e7bfc91b6bc177e""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2011-09-01 - Rain for…","""9e4ba78e84f5da9cb03a2a7db11a2c…","""6dfa0bb8feeb711fc01311c96aa495…","""9e7bfc91b6bc177e""",[],false,,
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-9734-74da-a188-6fd9bd…",2011-09-03,"""3718""","""Because I Miss You""","""https://www.femjoy.com/post/37…","""""",2024-10-14 04:03:01.707957,2024-10-14 04:03:01.707957,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3718"", ""title…","""01928931-a9a4-762d-bf72-328c39…",2024-10-14 07:03:06.276817,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2011-09-03 - Because …","""{""$type"": ""GalleryZipFileMetad…","[{""79e16c48-b3df-4fe4-9622-3722fe646618"",""danica"",""Danica"",""/models/danica""}]",[],,,"""f8eb8c06b6f6fba350dcd98780efea…",6178,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:00:19,2024-12-02 07:00:19,[],,"[{636007,""W:\Culture\Photos\Femjoy - 2011-09-03 - Because I Miss You - Poster Size Edition 5500px - 01928931-9734-74da-a188-6fd9bdbdfc4c.zip"",""Femjoy - 2011-09-03 - Because I Miss You - Poster Size Edition 5500px - 01928931-9734-74da-a188-6fd9bdbdfc4c.zip"",94396298,[{""md5"",""c44b3753fa09bc4646e99a22b77a9eeb""}, {""sha256"",""f8eb8c06b6f6fba350dcd98780efea356c6e2796e5986f86137e053f1810755a""}, {""xxhash"",""561bcec43600dd0c""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2011-09-03 - Because …","""c44b3753fa09bc4646e99a22b77a9e…","""f8eb8c06b6f6fba350dcd98780efea…","""561bcec43600dd0c""",[],false,,
"""019283a1-e6d7-767c-98dd-0fd1b1…","""Femjoy""",,"""01928931-af7e-740b-9240-15c642…",2011-09-04,"""3872""","""If You Like Me""","""https://www.femjoy.com/post/38…","""""",2024-10-14 04:03:07.887213,2024-10-14 04:03:07.887213,"""[{""file_type"": ""zip"", ""content…","""{""external_id"": ""3872"", ""title…","""01928931-b53d-7298-b118-417f6b…",2024-10-14 07:03:09.245009,"""zip""","""gallery""","""Poster Size Edition 5500px""","""{""file_type"": ""zip"", ""content_…","""zip_l.zip""","""Femjoy - 2011-09-04 - If You L…","""{""$type"": ""GalleryZipFileMetad…","[{""6c293b62-af42-44f9-8bd3-411578247453"",""corinna"",""Corinna"",""/models/corinna""}]",[],,,"""d9f5ea9774fcda44894a88a1db67e0…",6179,"""""","""""",,"""""",[],"{null,null,null}",2024-12-02 07:00:20,2024-12-02 07:00:20,[],,"[{636095,""W:\Culture\Photos\Femjoy - 2011-09-04 - If You Like Me - Poster Size Edition 5500px - 01928931-af7e-740b-9240-15c642c418de.zip"",""Femjoy - 2011-09-04 - If You Like Me - Poster Size Edition 5500px - 01928931-af7e-740b-9240-15c642c418de.zip"",18582367,[{""md5"",""c929e79e2bfabbb4b0eb9f3588e50e4a""}, {""sha256"",""d9f5ea9774fcda44894a88a1db67e02af4eb8be73060a52d40db314be2ab5405""}, {""xxhash"",""ad17769ae013eccd""}]}]","""W:\Culture\Photos\Femjoy - 201…","""Femjoy - 2011-09-04 - If You L…","""c929e79e2bfabbb4b0eb9f3588e50e…","""d9f5ea9774fcda44894a88a1db67e0…","""ad17769ae013eccd""",[],false,,


In [160]:
all_stashapp_performers = stash_client.get_performers()
all_stashapp_performers = all_stashapp_performers.with_columns(
    pl.col("stashapp_custom_fields").list.eval(
        pl.when(pl.element().struct.field("key") == f"CultureExtractor.{selected_studio['ce_sites_short_name']}")
        .then(pl.element().struct.field("value"))
        .otherwise(None)
    ).list.eval(
        pl.element().filter(pl.element().is_not_null())
    ).list.first().alias("ce_custom_field_value")
)
all_stashapp_performers

stashapp_id,stashapp_name,stashapp_alias_list,stashapp_urls,stashapp_gender,stashapp_stash_ids,stashapp_custom_fields,ce_custom_field_value
i64,str,list[str],list[str],enum,list[struct[3]],list[struct[2]],str
382,"""A.J.""",[],[],"""MALE""","[{""https://stashdb.org/graphql"",""d24e1448-7365-4285-b4fb-d5e1de1114ad"",1970-01-01 00:00:00}]",[],
85,"""Aali Kali""",[],"[""https://twitter.com/Aalikalixxx""]","""FEMALE""","[{""https://stashdb.org/graphql"",""74d5409f-7390-4373-9118-12b93d31534e"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""434e3732-a119-4f66-bb00-2abe66544bae"",1970-01-01 00:00:00}]",[],
3336,"""Aaliyah Hadid""",[],"[""https://aaliyahhadidxxx.com/"", ""https://es.wikipedia.org/wiki/Aaliyah_Hadid"", … ""https://www.youtube.com/c/aaliyahhadidxo""]","""FEMALE""","[{""https://stashdb.org/graphql"",""50459d16-787c-47c9-8ce9-a4cac9404324"",1970-01-01 00:00:00}]",[],
246,"""Aaliyah Love""",[],"[""https://twitter.com/AaliyahLove69""]","""FEMALE""","[{""https://stashdb.org/graphql"",""9ac606f4-a784-4849-a2fa-5c8b04831e7e"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""259f2f96-ac9b-4302-ba46-f5a51602e024"",1970-01-01 00:00:00}]",[],
3232,"""Aaron""",[],"[""https://www.iafd.com/person.rme/perfid=unknownmale344706-a/gender=m/aaron.htm""]","""MALE""","[{""https://stashdb.org/graphql"",""4cc33943-def0-478a-85af-a22bb3f03f40"",1970-01-01 00:00:00}]",[],
…,…,…,…,…,…,…,…
2769,"""Zoey Paige""",[],[],"""FEMALE""","[{""https://stashdb.org/graphql"",""bbb400f3-d028-40a4-aee4-53c536872c57"",1970-01-01 00:00:00}]",[],
677,"""Zor""",[],[],"""MALE""","[{""https://stashdb.org/graphql"",""eae0c0c9-5355-4c20-9c2d-c57c7acbb826"",1970-01-01 00:00:00}]",[],
1708,"""Zora Banks""",[],[],"""FEMALE""","[{""https://stashdb.org/graphql"",""746a6963-00fd-4198-aa76-c84ea076ef31"",1970-01-01 00:00:00}]",[],
249,"""Zuzana Zeleznovova""",[],[],"""FEMALE""","[{""https://stashdb.org/graphql"",""9f16356a-0be2-4792-aa0d-a0c1dbfeea72"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""85a68335-8d6b-4928-87c7-2c3888d003c7"",1970-01-01 00:00:00}]",[],


In [161]:
unique_gallery_performers = joined_galleries.select(pl.col("ce_downloads_performers")).explode(pl.col("ce_downloads_performers")).select([
        pl.col("ce_downloads_performers").struct.field("uuid").alias("performer_uuid"),
        pl.col("ce_downloads_performers").struct.field("name").alias("performer_name"),
    ]).unique().sort("performer_name")
unique_gallery_performers

performer_uuid,performer_name
str,str
"""91a88019-aeb9-4c1c-9c23-ce2e2a…","""Abigail"""
"""7871a3af-4b3c-4939-8e69-139a49…","""Aelita"""
"""958a353d-fcfa-418b-876a-91c4d0…","""Alba O."""
"""f11025ec-7a1d-48ba-97ef-92a6f7…","""Alisha"""
"""549ca9b5-fea0-4764-b914-08ecca…","""Alsu T."""
…,…
"""f4573ec7-19be-4466-a70f-8bdb67…","""Tea"""
"""e270e0f5-8125-44ec-8994-b1c6fc…","""Tracy A."""
"""fe544d4b-6b97-4cf4-8024-6768de…","""Vanea H."""
"""02f30e27-1d2c-471d-971f-534c58…","""Vanessa O."""


In [166]:
# Get matching performers using a join
matching_performers = all_stashapp_performers.join(
    unique_gallery_performers, 
    left_on="ce_custom_field_value",
    right_on="performer_uuid",
    how="right"
)

# Unmatched performers are the ones where stash fields are null
df_unmatched_performers = matching_performers.filter(
    pl.col("stashapp_id").is_null()
).select([
    "performer_uuid",
    "performer_name"
])

print(f"Found {len(matching_performers.filter(pl.col('stashapp_id').is_not_null()))} matching performers")
if len(df_unmatched_performers) > 0:
    print(f"WARNING: {len(df_unmatched_performers)} performers not found in Stash:")
    for row in df_unmatched_performers.iter_rows(named=True):
        print(f"  {row['performer_uuid']} {row['performer_name']}")

# Return the matched performers (filter out nulls)
matching_performers.filter(pl.col("stashapp_id").is_not_null())

Found 89 matching performers


stashapp_id,stashapp_name,stashapp_alias_list,stashapp_urls,stashapp_gender,stashapp_stash_ids,stashapp_custom_fields,performer_uuid,performer_name
i64,str,list[str],list[str],enum,list[struct[3]],list[struct[2]],str,str
4163,"""Mariya A""",[],"[""https://www.indexxx.com/m/mariya-a""]","""FEMALE""",[],"[{""CultureExtractor.femjoy"",""91a88019-aeb9-4c1c-9c23-ce2e2a2046b0""}]","""91a88019-aeb9-4c1c-9c23-ce2e2a…","""Abigail"""
1876,"""Mandy Dee""",[],[],"""FEMALE""","[{""https://stashdb.org/graphql"",""941f7e43-2685-4be5-8bbd-9b6fb1394aff"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""0064e78a-af5b-44fe-9d09-03250d7b5f17"",1970-01-01 00:00:00}]","[{""CultureExtractor.femjoy"",""7871a3af-4b3c-4939-8e69-139a49c1fe57""}]","""7871a3af-4b3c-4939-8e69-139a49…","""Aelita"""
4164,"""Alba A""",[],"[""https://www.europornstar.com/Alba2/"", ""https://www.indexxx.com/m/alba-o"", … ""https://www.thenude.com/_27907.htm""]","""FEMALE""","[{""https://stashdb.org/graphql"",""f67a4871-9c08-46f1-9833-a7d4a366a13a"",2025-01-12 13:40:29.695}]","[{""CultureExtractor.femjoy"",""958a353d-fcfa-418b-876a-91c4d06231f1""}]","""958a353d-fcfa-418b-876a-91c4d0…","""Alba O."""
188,"""Casey Nohrman""",[],"[""https://twitter.com/Moore_Eyla""]","""FEMALE""","[{""https://stashdb.org/graphql"",""8a10e8fb-2795-4dfb-8398-477690e46b29"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""69a87e77-ccae-4f28-8518-fa67eaf8eaf5"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""257e2263-6b64-4fa2-ae5e-74934b68acc5"",1970-01-01 00:00:00}]","[{""CultureExtractor.femjoy"",""f11025ec-7a1d-48ba-97ef-92a6f7b39206""}, {""CultureExtractor.girlsonlyporn"",""018ea295-a559-7517-af12-d58f777d0ba9""}, … {""CultureExtractor.tushy"",""018c06df-fd93-7180-836a-bb4e699ae0be""}]","""f11025ec-7a1d-48ba-97ef-92a6f7…","""Alisha"""
4165,"""Alsu T""",[],"[""https://www.indexxx.com/m/alsu-t"", ""https://www.thenude.com/_27808.htm""]","""FEMALE""","[{""https://stashdb.org/graphql"",""354b0a03-4e17-434e-80aa-15c784ecd2b1"",2025-01-12 13:42:32.618}]","[{""CultureExtractor.femjoy"",""549ca9b5-fea0-4764-b914-08eccaf6a750""}]","""549ca9b5-fea0-4764-b914-08ecca…","""Alsu T."""
…,…,…,…,…,…,…,…,…
1889,"""Tea""",[],[],"""FEMALE""","[{""https://stashdb.org/graphql"",""5a7f0db7-8e74-42be-9cac-a93386fd5f9a"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""37ca789e-0722-45de-be07-8a0e136509f0"",1970-01-01 00:00:00}]","[{""CultureExtractor.femjoy"",""f4573ec7-19be-4466-a70f-8bdb6745ee92""}]","""f4573ec7-19be-4466-a70f-8bdb67…","""Tea"""
2787,"""Tracy Gold""",[],[],"""FEMALE""","[{""https://stashdb.org/graphql"",""293c8d3a-7d30-41f7-8294-8ea6697c26ae"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""a1e06ded-e612-4790-9998-f8df54d6f3a2"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""affb3fb4-93de-4b1b-a0ff-52f7f99c1519"",1970-01-01 00:00:00}]","[{""CultureExtractor.femjoy"",""e270e0f5-8125-44ec-8994-b1c6fc230a3e""}]","""e270e0f5-8125-44ec-8994-b1c6fc…","""Tracy A."""
403,"""Viola Bailey""",[],"[""https://twitter.com/ViaArthur""]","""FEMALE""","[{""https://stashdb.org/graphql"",""4191bfc6-b978-494c-8fa5-9ae6be87fde1"",1970-01-01 00:00:00}, {""https://theporndb.net/graphql"",""b67dfcdf-0443-4e1a-b404-68184a4413a7"",1970-01-01 00:00:00}]","[{""CultureExtractor.femjoy"",""fe544d4b-6b97-4cf4-8024-6768de9f8733""}]","""fe544d4b-6b97-4cf4-8024-6768de…","""Vanea H."""
4160,"""Vanessa O""",[],"[""https://www.europornstar.com/Vanessa-Hottie/"", ""https://www.iafd.com/person.rme/id=9f90330b-7397-4aa9-b032-afc3b23ffcce"", … ""https://www.thenude.com/_27776.htm""]","""FEMALE""","[{""https://stashdb.org/graphql"",""4fe774ec-c8c4-4055-8c8b-9a1aceaddbd1"",2025-01-12 06:04:01.260}]","[{""CultureExtractor.femjoy"",""02f30e27-1d2c-471d-971f-534c5843062c""}]","""02f30e27-1d2c-471d-971f-534c58…","""Vanessa O."""


In [159]:
stash_client.update_performer_custom_fields(
    687,
    { "CultureExtractor." + selected_studio["ce_sites_short_name"]: "6e76ee6c-394a-43ae-a604-d65e62d6302e" }
)


In [169]:
ce_performer_mapping = stash_client.get_performers().with_columns([
    pl.col("stashapp_custom_fields").list.eval(
        pl.element().struct.field("value").filter(
            pl.element().struct.field("key") == "CultureExtractor." + selected_studio["ce_sites_short_name"]
        )
    ).list.first().alias("ce_custom_field_value")
]).filter(
    pl.col("ce_custom_field_value").is_not_null()
).select(
    pl.col("ce_custom_field_value").alias("ce_performer_uuid"),
    pl.col("stashapp_id").alias("stashapp_id"),
    pl.col("stashapp_name").alias("stashapp_name")
).sort(by=["stashapp_name"])
ce_performer_mapping

ce_performer_uuid,stashapp_id,stashapp_name
str,i64,str
"""958a353d-fcfa-418b-876a-91c4d0…",4164,"""Alba A"""
"""73ef6bf3-83ef-4cec-b547-43bcca…",492,"""Alecia Fox"""
"""549ca9b5-fea0-4764-b914-08ecca…",4165,"""Alsu T"""
"""3b12eecf-c692-477e-91e4-300d9a…",81,"""Amaris"""
"""0dd7ec3f-917f-4a23-9b33-49a4e2…",4195,"""Amelie B"""
…,…,…
"""ac2d9eaa-4f0e-4eeb-a249-80b7a7…",4179,"""Vanessa Albright"""
"""89a62349-19d7-43c0-956e-d53d93…",1339,"""Vanessa Angel"""
"""02f30e27-1d2c-471d-971f-534c58…",4160,"""Vanessa O"""
"""fe544d4b-6b97-4cf4-8024-6768de…",403,"""Viola Bailey"""


In [174]:
def create_update_dataframe(joined_galleries, downloads, all_stashapp_performers, all_tags, stashapp_studio_id):
    # Get all scene data ready for updates
    updates_df = joined_galleries.select([
        pl.col("ce_downloads_release_uuid").alias("ce_release_uuid"),
        pl.col("stashapp_id").alias("gallery_id"),
        pl.col("stashapp_primary_file_basename").alias("primary_file_basename"),
        pl.col("ce_downloads_release_date").alias("date"),
        pl.col("ce_downloads_release_name").alias("title"),
        pl.col("ce_downloads_release_short_name").alias("code"),
        pl.col("ce_downloads_release_description").alias("details"),
        pl.lit(stashapp_studio_id).alias("studio_id"),
        pl.col("ce_downloads_release_url").alias("url"),
        pl.col("ce_downloads_release_uuid"),
        pl.col("ce_downloads_performers"),
    ])

    # Map performers - now with unique values
    updates_df = updates_df.with_columns([
        # Get Culture Extractor UUIDs
        pl.col("ce_downloads_performers").list.eval(
            pl.element().struct.field("uuid")
        ).list.unique().alias("ce_performer_uuids"),
    ])

    # Join performer IDs with unique values
    updates_df = updates_df.with_columns([
        pl.when(pl.col("ce_performer_uuids").is_not_null())
        .then(
            pl.col("ce_performer_uuids").map_elements(
                lambda uuids: ce_performer_mapping.filter(
                    pl.col("ce_performer_uuid").is_in(uuids)
                ).get_column("stashapp_id").unique().to_list(),
                return_dtype=pl.List(pl.Int64)
            )
        )
        .otherwise(pl.Series([[]]))
        .alias("ce_performer_stashapp_ids"),

        pl.when(pl.col("ce_performer_uuids").is_not_null())
        .then(
            pl.col("ce_performer_uuids").map_elements(
                lambda uuids: ce_performer_mapping.filter(
                    pl.col("ce_performer_uuid").is_in(uuids)
                ).get_column("stashapp_name").unique().to_list(),
                return_dtype=pl.List(pl.Utf8)
            )
        )
        .otherwise(pl.Series([[]]))
        .alias("ce_performer_stashapp_names")
    ])
    
    updates_df = updates_df.with_columns([
        pl.concat_list([
            pl.col("ce_performer_stashapp_ids"),
        ]).list.unique().alias("performer_ids")
    ])      

    return updates_df

def generate_update_inputs(updates_df, stash_raw_client):
    updates = []
    
    for row in updates_df.iter_rows(named=True):
        # Get current scene data
        refreshed_gallery = stash_raw_client.find_gallery(row["gallery_id"])

        existing_performer_ids = [int(performer["id"]) for performer in refreshed_gallery.get("performers", [])]
        new_performer_ids = row["performer_ids"] if row["performer_ids"] is not None else []

        update = {
            "ce_release_uuid": row["ce_release_uuid"],
            "gallery_id": row["gallery_id"],
            "primary_file_basename": row["primary_file_basename"],
            "existing_gallery_date": refreshed_gallery.get("date", None) if refreshed_gallery else None,
            "date": row["date"].strftime("%Y-%m-%d") if row["date"] else None,
            "existing_gallery_title": refreshed_gallery.get("title", None) if refreshed_gallery else None,
            "title": row["title"],
            "existing_gallery_code": refreshed_gallery.get("code", None) if refreshed_gallery else None,
            "code": row["code"],
            "existing_gallery_details": refreshed_gallery.get("details", None) if refreshed_gallery else None,
            "details": row["details"],
            "existing_gallery_studio_id": refreshed_gallery.get("studio", {}).get("id") if refreshed_gallery and refreshed_gallery.get("studio") else None,
            "studio_id": row["studio_id"],
            "existing_gallery_performers": refreshed_gallery.get("performers", []) if refreshed_gallery else [],
            "performer_ids": list(set(existing_performer_ids + new_performer_ids)),
            "existing_gallery_urls": refreshed_gallery.get("urls", []) if refreshed_gallery else [],
            "gallery_urls": list(set(refreshed_gallery.get("urls", []) + [
                row["url"],
                f"https://culture.extractor/galleries/{row['ce_downloads_release_uuid']}"
            ])),
        }
        updates.append(update)

    return pl.DataFrame(updates)

# Usage
updates_df = create_update_dataframe(
    joined_galleries,
    downloads,
    all_stashapp_performers,
    all_tags,
    selected_studio["stash_studios_id"]
)

update_inputs_df = generate_update_inputs(updates_df, stash_raw_client)

update_inputs_df = update_inputs_df.sort(by=["date"])

# Review updates before applying
print("Updates to be applied:")
print(update_inputs_df)

Updates to be applied:
shape: (982, 17)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ ce_releas ┆ gallery_i ┆ primary_f ┆ existing_ ┆ … ┆ existing_ ┆ performer ┆ existing_ ┆ gallery_ │
│ e_uuid    ┆ d         ┆ ile_basen ┆ gallery_d ┆   ┆ gallery_p ┆ _ids      ┆ gallery_u ┆ urls     │
│ ---       ┆ ---       ┆ ame       ┆ ate       ┆   ┆ erformers ┆ ---       ┆ rls       ┆ ---      │
│ str       ┆ i64       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ list[i64] ┆ ---       ┆ list[str │
│           ┆           ┆ str       ┆ str       ┆   ┆ list[stru ┆           ┆ list[str] ┆ ]        │
│           ┆           ┆           ┆           ┆   ┆ ct[1]]    ┆           ┆           ┆          │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 019286eb- ┆ 5778      ┆ Femjoy -  ┆ 2005-05-0 ┆ … ┆ [{"4161"} ┆ [4161]    ┆ ["https:/ ┆ ["https: │
│ 2af7-735a ┆           ┆ 2005-05-0 ┆ 1         ┆  

In [176]:
for update in update_inputs_df.iter_rows(named=True):
    gallery_input = {
        "id": update["gallery_id"],
        "date": update["date"],
        "title": update["title"],
        "code": update["code"],
        "details": update["details"],
        "studio_id": update["studio_id"],
        "performer_ids": update["performer_ids"],
        "urls": update["gallery_urls"],
    }
    try:
        stash_raw_client.update_gallery(gallery_input)
    except Exception as e:
        print(f"Error updating gallery for {update['gallery_id']}: {e}")
        continue

In [65]:
first_gallery = joined_galleries
first_gallery


[{'ce_downloads_site_uuid': '019283a1-e6d7-767c-98dd-0fd1b1dd83ac',
  'ce_downloads_site_name': 'Femjoy',
  'ce_downloads_sub_site_name': None,
  'ce_downloads_release_uuid': '019286eb-4d89-745d-ae79-63f12f01b2d6',
  'ce_downloads_release_date': datetime.date(2005, 7, 11),
  'ce_downloads_release_short_name': '7564',
  'ce_downloads_release_name': 'Adoration',
  'ce_downloads_release_url': 'https://www.femjoy.com/post/7564',
  'ce_downloads_release_description': '',
  'ce_downloads_release_created': datetime.datetime(2024, 10, 13, 17, 27, 0, 878387),
  'ce_downloads_release_last_updated': datetime.datetime(2024, 10, 13, 17, 27, 0, 878387),
  'ce_downloads_release_available_files': '[{"file_type": "zip", "content_type": "gallery", "variant": "Quickview Edition 800px", "url": "https://c75600be0a.mjedge.net/hw/assets/studios/stefansoell/galleries/31102016180512/zip_s.zip?cd=attachment&filename=CORINNA_StefanSoell_7564_small.zip&expire=1728847619&token=066566a4c6e984ed264173d11487e807", "r

In [67]:
first_gallery[0]["stashapp_id"]

5787

In [68]:
[performer["uuid"] for performer in first_gallery[0]["ce_downloads_performers"]]


['6c293b62-af42-44f9-8bd3-411578247453']

In [69]:
import json
json.loads(first_gallery[0]["ce_downloads_release_json_document"])["director"]["name"]


'Stefan Soell'

In [70]:
# TODO: Remove this singular process

gallery_performers = [performer for performer in first_gallery[0]["ce_downloads_performers"]]

# Find matching performers in stash
matching_performers = all_stashapp_performers.filter(pl.col("ce_custom_field_value").is_in([performer["uuid"] for performer in gallery_performers]))

# Find which UUIDs didn't match
matched_uuids = matching_performers.get_column("ce_custom_field_value").to_list()
unmatched_performers = [performer for performer in gallery_performers if performer["uuid"] not in matched_uuids]

print(f"Found {len(matching_performers)} matching performers")
if unmatched_performers:
    print(f"WARNING: {len(unmatched_performers)} performers not found in Stash:")
    for performer in unmatched_performers:
        print(f"  {performer['name']}")

matching_performers


Found 1 matching performers


stashapp_id,stashapp_name,stashapp_alias_list,stashapp_urls,stashapp_gender,stashapp_stash_ids,stashapp_custom_fields,ce_custom_field_value
i64,str,list[str],list[str],enum,list[struct[3]],list[struct[2]],str
4158,"""Corinna""",[],"[""https://www.babepedia.com/babe/Corinna_(Femjoy)"", ""https://www.indexxx.com/m/corinna-2"", ""https://www.thenude.com/Corinna_774.htm""]","""FEMALE""","[{""https://stashdb.org/graphql"",""f0706d92-05fa-4a61-91bc-06c7a3ffc54b"",2025-01-12 05:53:36.221}]","[{""CultureExtractor.femjoy"",""6c293b62-af42-44f9-8bd3-411578247453""}]","""6c293b62-af42-44f9-8bd3-411578…"


In [71]:
unmatched_performers


[]

In [72]:
refresh_gallery = stash_raw_client.find_gallery(first_gallery[0]["stashapp_id"])
stash_raw_client.update_gallery({
    "id": first_gallery[0]["stashapp_id"],
    "title": first_gallery[0]["ce_downloads_release_name"],
    "code": first_gallery[0]["ce_downloads_release_short_name"],
    "date": first_gallery[0]["ce_downloads_release_date"].isoformat(),
    "details": first_gallery[0]["ce_downloads_release_description"],
    "photographer": json.loads(first_gallery[0]["ce_downloads_release_json_document"])["director"]["name"],
    "performer_ids": matching_performers.get_column("stashapp_id").to_list(),
    "studio_id": selected_studio["stash_studios_id"],
    "urls": refresh_gallery.get("urls", []) + [first_gallery[0]["ce_downloads_release_url"]]
})

'5787'

In [16]:
all_stashapp_performers.filter(pl.col("ce_custom_field_value").eq("acf38e04-cc75-4cf4-95f3-8253344a9a03"))

stashapp_id,stashapp_name,stashapp_alias_list,stashapp_urls,stashapp_gender,stashapp_stash_ids,stashapp_custom_fields,ce_custom_field_value
i64,str,list[str],list[str],enum,list[struct[3]],list[struct[2]],str


In [None]:
fragment = """
        id
        title
        details
        date
        code
        urls
        photographer
        created_at
        updated_at
        organized
        performers {
            id
            name
            disambiguation
            alias_list
            gender
            stash_ids {
                endpoint
                stash_id
                updated_at
            }
            custom_fields
        }
        studio {
            id
            name
            url
            parent_studio {
                id
                name
                url
            }
        }
        files {
        id
        path
        basename
        size
        fingerprints {
            type
            value
        }
        }
        tags {
        id
        name
        }
        scenes {
        id
        title
        }
        image_count
        """

galleries = stash_raw_client.find_galleries(f={}, fragment=fragment)
galleries[0]