In [29]:
import polars as pl
import dotenv
import os
from libraries.client_stashapp import get_stashapp_client
from libraries.StashDbClient import StashDbClient

# Format a StashDB ID for use as an aliasin Stash
stashdb_id_alias_prefix = "StashDB ID: "
def format_stashdb_id(id):
    return f"{stashdb_id_alias_prefix}{id}"

def contains_cjk(text):
    """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
    # Unicode ranges for CJK characters
    cjk_ranges = [
        (0x4E00, 0x9FFF),   # CJK Unified Ideographs
        (0x3040, 0x309F),   # Hiragana
        (0x30A0, 0x30FF),   # Katakana
        (0x3400, 0x4DBF),   # CJK Unified Ideographs Extension A
        (0xF900, 0xFAFF),   # CJK Compatibility Ideographs
        (0xAC00, 0xD7AF),   # Korean Hangul Syllables
    ]
    
    return any(any(ord(char) >= start and ord(char) <= end 
               for start, end in cjk_ranges) 
               for char in text)


dotenv.load_dotenv()

stash = get_stashapp_client()

stashbox_client = StashDbClient(
    os.getenv("STASHDB_ENDPOINT"),
    os.getenv("STASHDB_API_KEY"),
)

dUsing stash (v0.27.2-37-g0621d871) endpoint at http://localhost:6969/graphql
dPersisting Connection to Stash with ApiKey...


# Merging tags


In [None]:
to_be_merged_tag = stash.find_tag('Sideways')
target_tag = stash.find_tag('Side Fuck')

print(to_be_merged_tag)
print("=>")
print(target_tag)

In [None]:
# Scenes
scenes = stash.find_scenes({ "tags": { "value": [to_be_merged_tag['id']], "modifier": "INCLUDES" }}, fragment="id title tags { id name }")
print(len(scenes))

In [None]:
# Update scenes
for scene in scenes:
    scene_id = scene['id']
    current_scene_tag_ids = [tag['id'] for tag in scene['tags']]
    update_scene_tag_ids = [tag_id for tag_id in current_scene_tag_ids if tag_id != to_be_merged_tag['id']] + [target_tag['id']]
    stash.update_scene({ "id": scene_id, "tag_ids": update_scene_tag_ids })
    print(f"Updated scene {scene_id} with tag {target_tag['name']}")

In [None]:
# Galleries
galleries = stash.find_galleries({ "tags": { "value": [to_be_merged_tag['id']], "modifier": "INCLUDES" }}, fragment="id title tags { id name }")
print(len(galleries))

In [None]:
# Update galleries
for gallery in galleries:
    gallery_id = gallery['id']
    current_gallery_tag_ids = [tag['id'] for tag in gallery['tags']]
    update_gallery_tag_ids = [tag_id for tag_id in current_gallery_tag_ids if tag_id != to_be_merged_tag['id']] + [target_tag['id']]
    stash.update_gallery({ "id": gallery_id, "tag_ids": update_gallery_tag_ids })
    print(f"Updated gallery {gallery_id} with tag {target_tag['name']}")


In [None]:
# Images
images = stash.find_images({ "tags": { "value": [to_be_merged_tag['id']], "modifier": "INCLUDES" }}, fragment="id title tags { id name }")
print(len(images))

In [13]:
# Update images
for image in images:
    image_id = image['id']
    current_image_tag_ids = [tag['id'] for tag in image['tags']]
    update_image_tag_ids = [tag_id for tag_id in current_image_tag_ids if tag_id != to_be_merged_tag['id']] + [target_tag['id']]
    stash.update_image({ "id": image_id, "tag_ids": update_image_tag_ids })
    print(f"Updated image {image_id} with tag {target_tag['name']}")

In [None]:
# Markers
markers = stash.find_scene_markers({ "tags": { "value": [to_be_merged_tag['id']], "modifier": "INCLUDES" }}, fragment="id scene { id title } title primary_tag { id name } tags { id name }")

# Make sure we only update markers that have primary_tag set to the to_be_merged_tag
markers = [marker for marker in markers if marker['primary_tag']['id'] == to_be_merged_tag['id']]
print(len(markers))

In [None]:
# Update markers
for marker in markers:
    marker_id = marker['id']
    current_marker_tag_ids = [tag['id'] for tag in marker['tags']]
    update_marker_tag_ids = [tag_id for tag_id in current_marker_tag_ids if tag_id != to_be_merged_tag['id']] + [target_tag['id']]
    stash.update_scene_marker({ "id": marker_id, "title": target_tag["name"], "primary_tag_id": target_tag['id'] })
    print(f"Updated marker {marker_id} with tag {target_tag['name']} for scene {marker['scene']['title']} (ID: {marker['scene']['id']})")


# Syncing tags from StashDB to Stash

In [2]:
stashdb_tags = stashbox_client.query_tags()


In [3]:
# Expand the category column into separate columns
df_stashdb_tags = pl.DataFrame(stashdb_tags)

df_stashdb_tags = df_stashdb_tags.with_columns(
    pl.col("category").map_elements(lambda x: x['id'] if x else None, return_dtype=pl.Utf8).alias("category_id"),
    pl.col("category").map_elements(lambda x: x['name'] if x else None, return_dtype=pl.Utf8).alias("category_name"),
    pl.col("category").map_elements(lambda x: x['description'] if x else None, return_dtype=pl.Utf8).alias("category_description"),
    pl.col("category").map_elements(lambda x: x['group'] if x else None, return_dtype=pl.Utf8).alias("category_group"),
).drop("category")

df_stashdb_tags


id,name,description,aliases,deleted,created,updated,category_id,category_name,category_description,category_group
str,str,str,list[str],bool,str,str,str,str,str,str
"""9441c3ad-41d2-4d6e-bc97-54ad8c…","""120 FPS""","""Scenes offered at 120 frames p…","[""120帧""]",false,"""2022-04-05T20:28:06Z""","""2024-02-17T18:36:12.991842Z""","""ef4ae6d1-d13c-4195-b47e-f245e4…","""Shot Type""","""Technical details of how a vid…","""SCENE"""
"""42d9e5c4-1a1d-4c93-bf47-9086f2…","""12K Available""","""Scenes offered in a resolution…","[""12K"", ""12K Shemale VR Porn"", … ""True 12K""]",false,"""2024-12-03T05:31:48.278753Z""","""2024-12-03T05:31:48.278753Z""","""7f4ddc1b-8169-4d5b-b764-04ad07…","""Misc""","""Information about the video it…","""SCENE"""
"""8534d108-1f4c-42f9-8caa-5ca906…","""18+""","""Primary performer (not charact…","[""18 Plus"", ""Over 18""]",false,"""2024-03-30T04:09:32.347616Z""","""2024-03-30T04:09:32.347616Z""","""b40e08dd-314e-40ca-8fdb-bf7541…","""Age Group""","""Implied age ranges for charact…","""PEOPLE"""
"""103a1f16-83e1-4b9f-ab14-e85e04…","""180°""","""Virtual reality scenes with a …","[""180"", ""180 FOV"", … ""VR180""]",false,"""2020-04-27T18:59:52Z""","""2023-05-25T09:25:21.314083Z""","""ef4ae6d1-d13c-4195-b47e-f245e4…","""Shot Type""","""Technical details of how a vid…","""SCENE"""
"""6cd87d98-eea8-4b97-9db9-aa38a9…","""1800s""","""Inspired by the history and cu…","[""1800's"", ""19th Century"", … ""Victorian""]",false,"""2024-02-15T10:25:01.839985Z""","""2024-02-15T10:25:01.839985Z""","""0319d5d6-a07f-4e0d-809d-c09fb1…","""Themes""","""Events, contexts, or fetishes …","""SCENE"""
…,…,…,…,…,…,…,…,…,…,…
"""e7f1f848-4350-4bda-925c-b01235…","""Young Man (22–30)""","""Male presented as generally yo…","[""Young Guy"", ""Young Male"", … ""青年男子 (22–30)""]",false,"""2020-04-27T18:59:52Z""","""2024-11-12T06:27:11.374593Z""","""b40e08dd-314e-40ca-8fdb-bf7541…","""Age Group""","""Implied age ranges for charact…","""PEOPLE"""
"""84ba8ef1-084c-46f8-b352-31154f…","""Young Woman (22–30)""","""Female character presented as …","[""Chick"", ""Woman (20-29)"", … ""Youthful Woman""]",false,"""2020-04-27T18:59:52Z""","""2024-11-14T00:15:52.456833Z""","""b40e08dd-314e-40ca-8fdb-bf7541…","""Age Group""","""Implied age ranges for charact…","""PEOPLE"""
"""6c0a2824-acd2-4b64-9a2c-634bd9…","""Zentai""","""Skin-tight garment that covers…","[""Zentai Suit""]",false,"""2022-07-10T22:17:47.537338Z""","""2022-07-10T22:17:47.537338Z""","""dc566ccc-0584-41d8-b9f5-4d8680…","""Clothing""","""Articles or styles of clothing…","""PEOPLE"""
"""bed78871-9bb8-40c2-97b1-347c43…","""Zip Front Dress""","""A dress where the zipper runs …","[""Zipper Dress""]",false,"""2023-10-23T23:37:34.546141Z""","""2023-10-23T23:37:34.546141Z""","""dc566ccc-0584-41d8-b9f5-4d8680…","""Clothing""","""Articles or styles of clothing…","""PEOPLE"""


In [5]:
df_stashdb_tags.write_json("H:\\Parquet Data\\StashDB\\stashdb_tags.json")

In [22]:
stash_tags = stash.find_tags()
df_stash_tags = pl.DataFrame(stash_tags)
df_stash_tags = df_stash_tags.with_columns(
    pl.col("aliases").map_elements(
        lambda aliases: next(
            (alias[len(stashdb_id_alias_prefix):] for alias in aliases if isinstance(alias, str) and alias.startswith(stashdb_id_alias_prefix)),
            None
        ),
        return_dtype=pl.Utf8
    ).alias("stashdb_id")
)
df_stash_tags

id,name,description,aliases,ignore_auto_tag,created_at,updated_at,favorite,image_path,scene_count,scene_marker_count,image_count,gallery_count,performer_count,studio_count,group_count,parents,children,parent_count,child_count,stashdb_id
str,str,str,list[str],bool,str,str,bool,str,i64,i64,i64,i64,i64,i64,i64,list[struct[1]],list[struct[1]],i64,i64,str
"""5045""","""2D Available""","""3D or VR scenes that offer a m…","[""2-D"", ""2D"", … ""Two-Dimensional""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5045…",1,0,0,0,0,0,0,"[{""7752""}]",[],1,0,"""1257be8b-d1ec-4cb1-bb22-beeb89…"
"""5049""","""3D Available""","""Offered in a format with a thr…","[""3-D"", ""3D"", … ""Three-Dimensional""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5049…",50,0,0,0,0,0,0,"[{""7752""}]",[],1,0,"""52992c2c-4617-4540-8ca4-291e9c…"
"""5050""","""3K Available""","""Scenes offered in a resolution…","[""1600p"", ""3K VP9"", … ""StashDB ID: c3794d99-1b5b-47b3-86f7-75ff2de748b8""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5050…",7,0,0,0,0,0,0,"[{""7752""}]",[],1,0,"""c3794d99-1b5b-47b3-86f7-75ff2d…"
"""5051""","""3rd Person Narrative""","""Features a storyline with fict…","[""3rd Person Perspective"", ""StashDB ID: f562975c-e209-464c-83ed-8ac18eb3a2e8"", ""Third Person Perspective""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5051…",47,0,0,15,0,0,0,"[{""7751""}]",[],1,0,"""f562975c-e209-464c-83ed-8ac18e…"
"""5053""","""4:3 Aspect Ratio""","""Footage shot in a 4:3 (1.33:1)…","[""1.33:1"", ""1.33:1 Aspect Ratio"", … ""StashDB ID: 6958c8ed-1948-46d2-89e0-cb48919bf8f1""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5053…",0,0,0,0,0,0,0,"[{""7749""}]",[],1,0,"""6958c8ed-1948-46d2-89e0-cb4891…"
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""7561""","""Youtuber""","""""",[],false,"""2024-04-23T12:52:46Z""","""2024-04-23T12:52:46Z""",false,"""http://localhost:6969/tag/7561…",0,0,0,0,0,0,0,[],[],0,0,
"""7562""","""Zapper""","""""",[],false,"""2024-04-23T12:52:46Z""","""2024-04-23T12:52:46Z""",false,"""http://localhost:6969/tag/7562…",182,0,0,0,0,0,0,[],[],0,0,
"""7563""","""Zentai""","""Skin-tight garment that covers…","[""StashDB ID: 6c0a2824-acd2-4b64-9a2c-634bd9e4d0d0"", ""Zentai Suit""]",false,"""2024-04-23T12:52:46Z""","""2025-01-26T10:33:27+02:00""",false,"""http://localhost:6969/tag/7563…",0,0,0,0,0,0,0,"[{""7762""}]",[],1,0,"""6c0a2824-acd2-4b64-9a2c-634bd9…"
"""7564""","""Zip Front Dress""","""A dress where the zipper runs …","[""StashDB ID: bed78871-9bb8-40c2-97b1-347c43ca7113"", ""Zipper Dress""]",false,"""2024-04-23T12:52:46Z""","""2025-01-26T10:33:27+02:00""",false,"""http://localhost:6969/tag/7564…",0,0,0,0,0,0,0,"[{""7762""}]",[],1,0,"""bed78871-9bb8-40c2-97b1-347c43…"


In [23]:
# Merge df_stashdb_tags and df_stash_tags based on the 'name' column
merged_df = df_stashdb_tags.join(df_stash_tags, left_on='id', right_on='stashdb_id', how='full', suffix='_stash')

# Identify matching and non-matching tags
matching_tags = merged_df.filter(pl.col('id').is_not_null() & pl.col('id_stash').is_not_null())
stashdb_only_tags = merged_df.filter(pl.col('id_stash').is_null())
stash_only_tags = merged_df.filter(pl.col('id').is_null())

# Display results
print(f"Total matching tags: {len(matching_tags)}")
print(f"Tags only in StashDB: {len(stashdb_only_tags)}")
print(f"Tags only in Stash: {len(stash_only_tags)}")

merged_df

Total matching tags: 2538
Tags only in StashDB: 218
Tags only in Stash: 348


id,name,description,aliases,deleted,created,updated,category_id,category_name,category_description,category_group,id_stash,name_stash,description_stash,aliases_stash,ignore_auto_tag,created_at,updated_at,favorite,image_path,scene_count,scene_marker_count,image_count,gallery_count,performer_count,studio_count,group_count,parents,children,parent_count,child_count,stashdb_id
str,str,str,list[str],bool,str,str,str,str,str,str,str,str,str,list[str],bool,str,str,bool,str,i64,i64,i64,i64,i64,i64,i64,list[struct[1]],list[struct[1]],i64,i64,str
"""1257be8b-d1ec-4cb1-bb22-beeb89…","""2D Available""","""3D or VR scenes that offer a m…","[""2-D"", ""2D"", … ""Two-Dimensional""]",false,"""2020-05-01T09:37:09Z""","""2022-02-22T21:51:53Z""","""7f4ddc1b-8169-4d5b-b764-04ad07…","""Misc""","""Information about the video it…","""SCENE""","""5045""","""2D Available""","""3D or VR scenes that offer a m…","[""2-D"", ""2D"", … ""Two-Dimensional""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5045…",1,0,0,0,0,0,0,"[{""7752""}]",[],1,0,"""1257be8b-d1ec-4cb1-bb22-beeb89…"
"""52992c2c-4617-4540-8ca4-291e9c…","""3D Available""","""Offered in a format with a thr…","[""3-D"", ""3D"", … ""Three-Dimensional""]",false,"""2020-04-27T18:59:52Z""","""2022-02-22T21:52:15Z""","""7f4ddc1b-8169-4d5b-b764-04ad07…","""Misc""","""Information about the video it…","""SCENE""","""5049""","""3D Available""","""Offered in a format with a thr…","[""3-D"", ""3D"", … ""Three-Dimensional""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5049…",50,0,0,0,0,0,0,"[{""7752""}]",[],1,0,"""52992c2c-4617-4540-8ca4-291e9c…"
"""c3794d99-1b5b-47b3-86f7-75ff2d…","""3K Available""","""Scenes offered in a resolution…","[""1600p"", ""3K VP9"", … ""3KVR""]",false,"""2023-02-06T23:54:49.304855Z""","""2023-02-06T23:54:49.304855Z""","""7f4ddc1b-8169-4d5b-b764-04ad07…","""Misc""","""Information about the video it…","""SCENE""","""5050""","""3K Available""","""Scenes offered in a resolution…","[""1600p"", ""3K VP9"", … ""StashDB ID: c3794d99-1b5b-47b3-86f7-75ff2de748b8""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5050…",7,0,0,0,0,0,0,"[{""7752""}]",[],1,0,"""c3794d99-1b5b-47b3-86f7-75ff2d…"
"""f562975c-e209-464c-83ed-8ac18e…","""3rd Person Narrative""","""Features a storyline with fict…","[""3rd Person Perspective"", ""Third Person Perspective"", ""第三者撮り""]",false,"""2021-02-02T16:02:36Z""","""2024-12-01T16:57:51.952057Z""","""0319d5d6-a07f-4e0d-809d-c09fb1…","""Themes""","""Events, contexts, or fetishes …","""SCENE""","""5051""","""3rd Person Narrative""","""Features a storyline with fict…","[""3rd Person Perspective"", ""StashDB ID: f562975c-e209-464c-83ed-8ac18eb3a2e8"", ""Third Person Perspective""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5051…",47,0,0,15,0,0,0,"[{""7751""}]",[],1,0,"""f562975c-e209-464c-83ed-8ac18e…"
"""6958c8ed-1948-46d2-89e0-cb4891…","""4:3 Aspect Ratio""","""Footage shot in a 4:3 (1.33:1)…","[""1.33:1"", ""1.33:1 Aspect Ratio"", … ""Fullscreen""]",false,"""2022-08-08T00:33:32.647805Z""","""2022-08-26T06:14:19.530426Z""","""ef4ae6d1-d13c-4195-b47e-f245e4…","""Shot Type""","""Technical details of how a vid…","""SCENE""","""5053""","""4:3 Aspect Ratio""","""Footage shot in a 4:3 (1.33:1)…","[""1.33:1"", ""1.33:1 Aspect Ratio"", … ""StashDB ID: 6958c8ed-1948-46d2-89e0-cb48919bf8f1""]",false,"""2024-04-23T12:50:49Z""","""2025-01-26T10:32:59+02:00""",false,"""http://localhost:6969/tag/5053…",0,0,0,0,0,0,0,"[{""7749""}]",[],1,0,"""6958c8ed-1948-46d2-89e0-cb4891…"
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""9c1d355d-e175-4fb7-b483-594220…","""German Language""","""Scene shows performer(s) speak…",[],false,"""2024-12-31T23:03:13.850936Z""","""2024-12-31T23:03:13.850936Z""","""7f4ddc1b-8169-4d5b-b764-04ad07…","""Misc""","""Information about the video it…","""SCENE""",,,,,,,,,,,,,,,,,,,,,
"""b836e27d-6418-4ce0-973b-f6f558…","""Capture""","""Performer takes and holds anot…","[""Captured""]",false,"""2024-12-10T03:32:53.367539Z""","""2024-12-10T03:32:53.367539Z""","""feca7511-ac91-42c0-a032-8fb8f3…","""Acts""","""Various sexual acts or positio…","""ACTION""",,,,,,,,,,,,,,,,,,,,,
"""c8b9473c-07a9-45af-980c-864669…","""Observer""","""A performer who observes, but …","[""Sex In Front of Others"", ""Watching"", ""Watching Others""]",false,"""2020-04-27T18:59:52.416662Z""","""2024-07-31T22:59:37.313842Z""","""c423ad76-53f3-45a2-a865-87ee19…","""Roles""","""Common character archetypes or…","""SCENE""",,,,,,,,,,,,,,,,,,,,,
"""38a92ca9-ef53-4c48-b8a3-dd0682…","""Dirty Ass to Mouth""","""Oral penetration of a performe…","[""DATM"", ""Dirty A2M"", … ""Messy Ass to Mouth""]",false,"""2020-05-01T09:37:09.35167Z""","""2024-10-31T20:04:14.281471Z""","""feca7511-ac91-42c0-a032-8fb8f3…","""Acts""","""Various sexual acts or positio…","""ACTION""",,,,,,,,,,,,,,,,,,,,,


# Create category groups

In [24]:
# Get all unique category groups from StashDB tags
category_groups = df_stashdb_tags.select('category_group').drop_nulls().unique().to_series().to_list()

# Display the category groups
print("Unique category groups in StashDB:")
for group in sorted(category_groups):
    print(f"- {group}")
    tag_name = f"Category Group: {group}"
    existing_tag = stash.find_tag(tag_name)
    if existing_tag is None:
        stash.create_tag({
            "name": tag_name,
            "description": f"StashDB category group: {group}",
        })
        print(f"Created tag: {tag_name}")
    else:
        print(f"Tag already exists: {tag_name}")

Unique category groups in StashDB:
- ACTION
Tag already exists: Category Group: ACTION
- PEOPLE
Tag already exists: Category Group: PEOPLE
- SCENE
Tag already exists: Category Group: SCENE


# Create categories

In [25]:
# Get all unique categories from StashDB tags
unique_categories = df_stashdb_tags.select(['category_name', 'category_group', 'category_description']).drop_nulls().unique()

# Display the unique categories
print("Unique categories in StashDB:")
for category in unique_categories.iter_rows(named=True):
    print(f"- Name: {category['category_name'] or 'N/A'}")
    print(f"  Group: {category['category_group'] or 'N/A'}")
    print(f"  Description: {category['category_description'] or 'N/A'}")
    print()

# Create tags for each unique category in Stash
for category in unique_categories.iter_rows(named=True):
    name = category['category_name']
    group = category['category_group']
    description = category['category_description']
    
    category_tag = stash.find_tag(f"Category: {name}")
    if category_tag is None:
        category_group_tag = stash.find_tag(f"Category Group: {group}")
        
        category_tag = stash.create_tag({
            "name": f"Category: {name}",
            "description": f"StashDB category: {name}",
            "parent_ids": [category_group_tag['id']] if category_group_tag else None,
        })
        print(f"Created category tag: {name}")


Unique categories in StashDB:
- Name: Body Type
  Group: PEOPLE
  Description: Various descriptions of body fat, body shape, body hair, or muscle tone.

- Name: Themes
  Group: SCENE
  Description: Events, contexts, or fetishes often featured in scenes.

- Name: Surfaces
  Group: SCENE
  Description: What the action takes place on top of, often furniture of some kind.

- Name: Roles
  Group: SCENE
  Description: Common character archetypes or personalities.

- Name: Face
  Group: PEOPLE
  Description: Various descriptions of a performer's facial features.

- Name: Age Group
  Group: PEOPLE
  Description: Implied age ranges for characters, not performers.

- Name: Race
  Group: PEOPLE
  Description: Broad categories for a performer's racial background.

- Name: Orientation
  Group: SCENE
  Description: Broad categories of sex/gender combinations, often aligning more closely to traditional marketing terms than real-world sexual orientations.

- Name: Motivations
  Group: SCENE
  Descript

# Update descriptions

In [26]:
# Create records of tags that need updates
description_update_records = []

for row in df_stash_tags.iter_rows(named=True):
    stash_tag_name = row['name']
    stashdb_tag = df_stashdb_tags.filter(pl.col('name') == stash_tag_name)
    
    if not stashdb_tag.is_empty():
        stashdb_tag = stashdb_tag.to_dicts()[0]
        
        # Check if description needs updating
        if stashdb_tag['description'] != row['description']:
            description_update_records.append({
                'tag_id': row['id'],
                'name': stash_tag_name,
                'field': 'description',
                'current_value': row['description'] or '',  # Handle None values
                'proposed_value': stashdb_tag['description'] or '',
            })
        
df_description_updates = pl.DataFrame(description_update_records).sort(['name', 'field']).filter(pl.col('current_value') != pl.col('proposed_value'))
df_description_updates

tag_id,name,field,current_value,proposed_value
str,str,str,str,str


In [None]:
for row in df_description_updates.iter_rows(named=True):
    print(row['name'])
    print(row['current_value'])
    print(row['proposed_value'])
    print()

    update_data = {
        "id": row['tag_id'],
        "description": row['proposed_value']
    }
    try:
        stash.update_tag(update_data)
        print(f"Updated tag: {row['name']}")
    except Exception as e:
        print(f"Error updating tag {row['name']}: {e}")

# Update aliases


In [27]:
# Create records of tags that need updates
alias_update_records = []

for row in df_stash_tags.iter_rows(named=True):
    stash_tag_name = row['name']
    stashdb_tag = df_stashdb_tags.filter(pl.col('name') == stash_tag_name)
    
    if not stashdb_tag.is_empty():
        stashdb_tag = stashdb_tag.to_dicts()[0]
        
        # Convert aliases to sets for comparison
        current_aliases = set(row['aliases']) if row['aliases'] else set()
        # Filter out aliases containing CJK characters
        proposed_aliases = {alias for alias in (stashdb_tag['aliases'] or []) 
                          if not contains_cjk(alias)}
        
        # Check if aliases need updating
        if current_aliases != proposed_aliases:
            # Calculate differences
            to_add = proposed_aliases - current_aliases
            to_remove = current_aliases - proposed_aliases
            
            # Only proceed if there are changes
            if to_add or to_remove:
                # Format difference string
                diff_parts = []
                if to_add:
                    diff_parts.append(f"+ {', '.join(sorted(to_add))}")
                if to_remove:
                    diff_parts.append(f"- {', '.join(sorted(to_remove))}")
                
                alias_update_records.append({
                    'tag_id': row['id'],
                    'name': stash_tag_name,
                    'current_aliases': ', '.join(sorted(current_aliases)),
                    'proposed_aliases': ', '.join(sorted(proposed_aliases)),
                    'differences': ' | '.join(diff_parts),
                    'current_list': sorted(current_aliases),
                    'proposed_list': sorted(proposed_aliases)
                })

# Create DataFrame and sort by name
df_alias_updates = pl.DataFrame(alias_update_records).sort('name')

# Print summary
print(f"Found {len(df_alias_updates)} tags with non-CJK alias updates")
print("\nSample of proposed updates:")
print(df_alias_updates.select(['name', 'current_aliases', 'proposed_aliases', 'differences']).head())

df_alias_updates

Found 2538 tags with non-CJK alias updates

Sample of proposed updates:
shape: (5, 4)
┌─────────┬─────────────────────────────────┬───────────────────────┬───────────────────┐
│ name    ┆ current_aliases                 ┆ proposed_aliases      ┆ differences       │
│ ---     ┆ ---                             ┆ ---                   ┆ ---               │
│ str     ┆ str                             ┆ str                   ┆ str               │
╞═════════╪═════════════════════════════════╪═══════════════════════╪═══════════════════╡
│ 120 FPS ┆ StashDB ID: 9441c3ad-41d2-4d6e… ┆                       ┆ - StashDB ID:     │
│         ┆                                 ┆                       ┆ 9441c3ad-41d2-4d… │
│ 18+     ┆ 18 Plus, Over 18, StashDB ID: … ┆ 18 Plus, Over 18      ┆ - StashDB ID:     │
│         ┆                                 ┆                       ┆ 8534d108-1f4c-42… │
│ 1800s   ┆ 1800's, 19th Century, American… ┆ 1800's, 19th Century, ┆ - StashDB ID:     │
│         ┆   

tag_id,name,current_aliases,proposed_aliases,differences,current_list,proposed_list
str,str,str,str,str,list[str],list[str]
"""5029""","""120 FPS""","""StashDB ID: 9441c3ad-41d2-4d6e…","""""","""- StashDB ID: 9441c3ad-41d2-4d…","[""StashDB ID: 9441c3ad-41d2-4d6e-bc97-54ad8cc227d5""]",[]
"""5030""","""18+""","""18 Plus, Over 18, StashDB ID: …","""18 Plus, Over 18""","""- StashDB ID: 8534d108-1f4c-42…","[""18 Plus"", ""Over 18"", ""StashDB ID: 8534d108-1f4c-42f9-8caa-5ca9064f560d""]","[""18 Plus"", ""Over 18""]"
"""5032""","""1800s""","""1800's, 19th Century, American…","""1800's, 19th Century, American…","""- StashDB ID: 6cd87d98-eea8-4b…","[""1800's"", ""19th Century"", … ""Victorian""]","[""1800's"", ""19th Century"", … ""Victorian""]"
"""5031""","""180°""","""180, 180 FOV, 180 VR, 180VR, 1…","""180, 180 FOV, 180 VR, 180VR, 1…","""- StashDB ID: 103a1f16-83e1-4b…","[""180"", ""180 FOV"", … ""VR180""]","[""180"", ""180 FOV"", … ""VR180""]"
"""5033""","""190°""","""190, RF52, StashDB ID: e3338b1…","""190, RF52""","""- StashDB ID: e3338b1d-63f8-42…","[""190"", ""RF52"", ""StashDB ID: e3338b1d-63f8-42ea-b617-37b0acf0eea5""]","[""190"", ""RF52""]"
…,…,…,…,…,…,…
"""7559""","""Young Man (22–30)""","""StashDB ID: e7f1f848-4350-4bda…","""Young Guy, Young Male, Young M…","""- StashDB ID: e7f1f848-4350-4b…","[""StashDB ID: e7f1f848-4350-4bda-925c-b0123521b4de"", ""Young Guy"", … ""Youthful Man""]","[""Young Guy"", ""Young Male"", … ""Youthful Man""]"
"""7560""","""Young Woman (22–30)""","""Chick, StashDB ID: 84ba8ef1-08…","""Chick, Woman (20-29), Young Ch…","""- StashDB ID: 84ba8ef1-084c-46…","[""Chick"", ""StashDB ID: 84ba8ef1-084c-46f8-b352-31154f5bfbbc"", … ""Youthful Woman""]","[""Chick"", ""Woman (20-29)"", … ""Youthful Woman""]"
"""7563""","""Zentai""","""StashDB ID: 6c0a2824-acd2-4b64…","""Zentai Suit""","""- StashDB ID: 6c0a2824-acd2-4b…","[""StashDB ID: 6c0a2824-acd2-4b64-9a2c-634bd9e4d0d0"", ""Zentai Suit""]","[""Zentai Suit""]"
"""7564""","""Zip Front Dress""","""StashDB ID: bed78871-9bb8-40c2…","""Zipper Dress""","""- StashDB ID: bed78871-9bb8-40…","[""StashDB ID: bed78871-9bb8-40c2-97b1-347c43ca7113"", ""Zipper Dress""]","[""Zipper Dress""]"


In [None]:
for row in df_alias_updates.iter_rows(named=True):
    update_data = {
        "id": row['tag_id'],
        "aliases": row['proposed_list']
    }
    try:
        stash.update_tag(update_data)
        print(f"Updated tag: {row['name']}")
    except Exception as e:
        print(f"Error updating tag {row['name']}: {e}")

# Clean out the CJK aliases from existing tags

In [None]:
# First add a column with cleaned aliases
df_stash_tags = df_stash_tags.with_columns(
    pl.col('aliases').map_elements(lambda x: [alias for alias in x if not contains_cjk(alias)], return_dtype=pl.List(pl.Utf8)).alias('cleaned_aliases')
)

# Find tags where current aliases differ from cleaned aliases
tags_to_update = df_stash_tags.filter(pl.col('aliases') != pl.col('cleaned_aliases'))

print(f"Found {len(tags_to_update)} tags with CJK aliases to remove")
print("\nSample of changes to make:")
print(tags_to_update.select([
    'name',
    'aliases',
    'cleaned_aliases'
]).head())

# Optional: Apply the updates
def apply_alias_cleanup(tags_df):
    for row in tags_df.iter_rows(named=True):
        update_data = {
            'id': row['id'],
            'aliases': row['cleaned_aliases']
        }
        
        try:
            stash.update_tag(update_data)
            print(f"Updated aliases for {row['name']}")
        except Exception as e:
            print(f"Error updating {row['name']}: {e}")

# Uncomment to apply the updates:
apply_alias_cleanup(tags_to_update)

tags_to_update_for_review = tags_to_update.select(['name', 'aliases', 'cleaned_aliases'])
tags_to_update_for_review


In [None]:
# Create tags in Stash which exist in StashDB but not in Stash
stashdb_only_tags = df_stashdb_tags.filter(~pl.col('name').is_in(df_stash_tags.select('name').to_series()))

print(f"Number of tags in StashDB but not in Stash: {len(stashdb_only_tags)}")

for stashdb_tag in stashdb_only_tags.iter_rows(named=True):
    # Check if the tag already exists in Stash
    existing_tag = stash.find_tag(stashdb_tag['name'])
    if existing_tag:
        # Check if the tag exists due to an alias
        if stashdb_tag['name'] in existing_tag['aliases']:
            print(f"Tag already exists due to alias: {stashdb_tag['name']}")
        else:
            print(f"Tag already exists: {stashdb_tag['name']}")
        continue
    
    # Find the category tag if it exists
    category_tag = None
    if stashdb_tag['category_name']:
        category_tag = stash.find_tag(f"Category: {stashdb_tag['category_name']}")
    
    # Prepare the tag data
    tag_data = {
        "name": stashdb_tag['name'],
        "description": stashdb_tag['description'],
    }
    
    # Add aliases if they exist
    if stashdb_tag['aliases']:
        tag_data["aliases"] = stashdb_tag['aliases']
    
    # Add parent category if it exists
    if category_tag:
        tag_data["parent_ids"] = [category_tag['id']]
    
    # Create the tag in Stash
    try:
        # new_tag = stash.create_tag(tag_data)
        # print(f"Created tag: {new_tag['name']}")
        print(tag_data)
    except Exception as e:
        print(f"Error creating tag: {e}")

print(f"Created {len(stashdb_only_tags)} new tags in Stash.")


# Legacy below

In [None]:
# Create tags in Stash which exist in StashDB but not in Stash
stashdb_only_tags = df_stashdb_tags[~df_stashdb_tags['name'].isin(df_stash_tags['name'])]

print(f"Number of tags in StashDB but not in Stash: {len(stashdb_only_tags)}")

for _, stashdb_tag in stashdb_only_tags.iterrows():
    # Check if the tag already exists in Stash
    existing_tag = stash.find_tag(stashdb_tag['name'])
    if existing_tag:
        print(f"Tag already exists: {stashdb_tag['name']}")
        continue
    
    # Find the category tag if it exists
    category_tag = None
    if pd.notna(stashdb_tag['category_name']):
        category_tag = stash.find_tag(f"Category: {stashdb_tag['category_name']}")
    
    # Prepare the tag data
    tag_data = {
        "name": stashdb_tag['name'],
        "description": stashdb_tag['description'],
    }
    
    # Add aliases if they exist
    if pd.notna(stashdb_tag['aliases']).any() and len(stashdb_tag['aliases']) > 0:
        tag_data["aliases"] = stashdb_tag['aliases']
    
    # Add parent category if it exists
    if category_tag:
        tag_data["parent_ids"] = [category_tag['id']]
    
    # Create the tag in Stash
    try:
        new_tag = stash.create_tag(tag_data)
        print(f"Created tag: {new_tag['name']}")
    except Exception as e:
        print(f"Error creating tag: {e}")

print(f"Created {len(stashdb_only_tags)} new tags in Stash.")


In [None]:
for _, row in merged_df.iterrows():
    stash_tag_id = row['id_stash']
    stashdb_category_name = row['category_name']
    if pd.notna(stashdb_category_name):
        category_tag = stash.find_tag(f"Category: {stashdb_category_name}")
        if category_tag:
            if pd.notna(stash_tag_id):
                try:
                    refreshed_tag = stash.find_tag(int(stash_tag_id))
                    if refreshed_tag and "parents" in refreshed_tag:
                        existing_parent_ids = [parent['id'] for parent in refreshed_tag["parents"] if parent is not None]
                        if category_tag['id'] not in existing_parent_ids:
                            new_parent_ids = existing_parent_ids + [category_tag['id']]
                            stash.update_tag({ 
                                "id": int(stash_tag_id), 
                                "description": row['description_stashdb'],
                                "parent_ids": new_parent_ids
                            })
                            print(f"Updated tag: {row['name']}, applied category: {stashdb_category_name}")
                    else:
                        print(f"Warning: Could not refresh tag with ID {stash_tag_id}")
                except ValueError:
                    print(f"Warning: Invalid tag ID {stash_tag_id} for tag {row['name']}")
            else:
                print(f"Warning: Missing tag ID for tag {row['name']}")


In [None]:
# Create all tags in Stash which are in stashdb_only_tags
for _, stashdb_tag in stashdb_only_tags.iterrows():
    stash.create_tag({
        "name": stashdb_tag['name'],
        "description": stashdb_tag['description'],
        "aliases": stashdb_tag['aliases'],
        "category": stashdb_tag['category_name']
    })

print(f"Created {len(stashdb_only_tags)} new tags in Stash.")