In [1]:
import sys
from pathlib import Path

import polars as pl


sys.path.append(str(Path.cwd().parent))

from libraries.client_stashapp import StashAppClient, get_stashapp_client


stash = get_stashapp_client("MISSING_SDB_")
stash_client = StashAppClient("MISSING_SDB_")

dUsing stash (v0.28.1-89-g642b0f22) endpoint at http://localhost:6968/graphql
dUsing stash (v0.28.1-89-g642b0f22) endpoint at http://localhost:6968/graphql


In [None]:
all_missing_scenes = stash.find_scenes(fragment="id title stash_ids { endpoint stash_id }")

In [5]:
# Verify data retrieval
print(f"Total scenes retrieved: {len(all_missing_scenes)}")
print(f"Sample scene: {all_missing_scenes[0] if all_missing_scenes else 'No scenes found'}")

# Show first few scenes with stash IDs to understand the structure
scenes_with_stash_ids = [scene for scene in all_missing_scenes if scene.get("stash_ids")]
print(f"Scenes with stash IDs: {len(scenes_with_stash_ids)}")
if scenes_with_stash_ids:
    print(f"Sample scene with stash IDs: {scenes_with_stash_ids[0]}")


Total scenes retrieved: 23623
Sample scene: {'id': '2985', 'title': "Rocco's 3-Slut Anal & Fetish Fantasy", 'stash_ids': [{'endpoint': 'https://stashdb.org/graphql', 'stash_id': '35d03082-79d4-4fee-87f8-95bc2573a6dc'}]}
Scenes with stash IDs: 23623
Sample scene with stash IDs: {'id': '2985', 'title': "Rocco's 3-Slut Anal & Fetish Fantasy", 'stash_ids': [{'endpoint': 'https://stashdb.org/graphql', 'stash_id': '35d03082-79d4-4fee-87f8-95bc2573a6dc'}]}


In [6]:
# Create a flat list of all stash IDs with their associated scene information
stash_id_records = []

for scene in all_missing_scenes:
    scene_id = scene.get("id")
    scene_title = scene.get("title", "No Title")
    stash_ids = scene.get("stash_ids", [])

    for stash_id_obj in stash_ids:
        stash_id_records.append({
            "scene_id": scene_id,
            "scene_title": scene_title,
            "stash_id": stash_id_obj.get("stash_id"),
            "endpoint": stash_id_obj.get("endpoint")
        })

print(f"Total stash ID records: {len(stash_id_records)}")

# Create polars DataFrame
df_stash_ids = pl.DataFrame(stash_id_records)
print(f"DataFrame shape: {df_stash_ids.shape}")

df_stash_ids.head(10)


Total stash ID records: 23623
DataFrame shape: (23623, 4)


scene_id,scene_title,stash_id,endpoint
str,str,str,str
"""2985""","""Rocco's 3-Slut Anal & Fetish F…","""35d03082-79d4-4fee-87f8-95bc25…","""https://stashdb.org/graphql"""
"""5035""","""Slutty Girls Love Rocco""","""50b71b8c-6523-4eb9-97b2-00498f…","""https://stashdb.org/graphql"""
"""5533""","""2 Threesomes: Anal, DP, Bi Gir…","""afe3ba6c-7af8-4d03-b92a-39f5f5…","""https://stashdb.org/graphql"""
"""7914""","""Rocco's Intimate Initiations #…","""21dccfcf-fa6e-467f-bf1b-3dd84b…","""https://stashdb.org/graphql"""
"""7982""","""Rocco's Perfect Slaves #05""","""3f3a6368-7020-464e-aa28-51cec7…","""https://stashdb.org/graphql"""
"""11925""","""BONUS-Slutty Girls Love Rocco …","""3dbaa1a2-d060-49ae-8d31-e2d2c9…","""https://stashdb.org/graphql"""
"""11935""","""Rocco's Abbondanza:Big, Natura…","""527596c8-5e83-4c39-8cb8-2d2f5e…","""https://stashdb.org/graphql"""
"""12303""","""Anna, Mini & Rocco's Anal Thre…","""8c3cf4b1-8fde-4793-8e26-ce1af2…","""https://stashdb.org/graphql"""
"""12610""","""BONUS-Slutty Girls Love Rocco …","""3dbaa1a2-d060-49ae-8d31-e2d2c9…","""https://stashdb.org/graphql"""
"""12620""","""Rocco's Abbondanza:Big, Natura…","""527596c8-5e83-4c39-8cb8-2d2f5e…","""https://stashdb.org/graphql"""


In [13]:
# Find duplicate stash IDs
# Group by stash_id and endpoint, then count occurrences
duplicate_stash_ids = (
    df_stash_ids
    .group_by(["stash_id", "endpoint"])
    .agg([
        pl.len().alias("scene_count"),
        pl.col("scene_id").alias("scene_ids"),
        pl.col("scene_title").alias("scene_titles")
    ])
    .filter(pl.col("scene_count") > 1)
    .sort("scene_count", descending=True)
)

print(f"Found {len(duplicate_stash_ids)} duplicate stash_id/endpoint combinations")
print(f"Total scenes affected: {duplicate_stash_ids.select(pl.col('scene_count').len()).item()}")

duplicate_stash_ids


Found 470 duplicate stash_id/endpoint combinations
Total scenes affected: 470


stash_id,endpoint,scene_count,scene_ids,scene_titles
str,str,u32,list[str],list[str]
"""554aaa2c-06cd-4b24-bc54-824cea…","""https://stashdb.org/graphql""",4,"[""7587"", ""15010"", … ""28170""]","[""Master of Polygamy"", ""Master of Polygamy"", … ""Master of Polygamy""]"
"""3bdf233f-cb0d-4088-819d-8d2c79…","""https://stashdb.org/graphql""",4,"[""2408"", ""4833"", … ""23571""]","[""Cum loving bitches"", ""Cum loving bitches"", … ""Cum loving bitches""]"
"""8e75e43f-cc86-4868-af3b-26501e…","""https://stashdb.org/graphql""",3,"[""2389"", ""4795"", ""16670""]","[""Episode 9 - Playing With The Pros 2"", ""Episode 9 - Playing With The Pros 2"", ""Episode 9 - Playing With The Pros 2""]"
"""f4d7c980-ec23-45a1-b533-b3dbbc…","""https://stashdb.org/graphql""",3,"[""21624"", ""23906"", ""26708""]","[""Practice Safe Distancing"", ""Practice Safe Distancing"", ""Practice Safe Distancing""]"
"""3e89877e-b5f1-48f5-bdaa-d27713…","""https://stashdb.org/graphql""",3,"[""2398"", ""4804"", ""16677""]","[""Episode 1 - Battle Of The Sexes"", ""Episode 1 - Battle Of The Sexes"", ""Episode 1 - Battle Of The Sexes""]"
…,…,…,…,…
"""13e45463-b6c2-4bf5-a405-5e9293…","""https://stashdb.org/graphql""",2,"[""10022"", ""25723""]","[""Sorority Girls & The Dean 2"", ""Sorority Girls & The Dean 2""]"
"""9ef73e1f-6ac7-4071-ab0a-d7c0cc…","""https://stashdb.org/graphql""",2,"[""11938"", ""12623""]","[""LOL-014"", ""LOL-014""]"
"""bd77ec56-39f5-44f0-9444-382d1c…","""https://stashdb.org/graphql""",2,"[""11871"", ""12556""]","[""That video finally started Abigail Johnson lifted the ban"", ""That video finally started Abigail Johnson lifted the ban""]"
"""3aafd08f-d6e6-44bf-88eb-a81f66…","""https://stashdb.org/graphql""",2,"[""11974"", ""12659""]","[""SpunkyBee"", ""SpunkyBee""]"


In [14]:
# Delete duplicate scenes, keeping the first scene in each group
scenes_to_delete = []
scenes_to_keep = []

# Iterate through each duplicate group
for row in duplicate_stash_ids.iter_rows(named=True):
    scene_ids = row["scene_ids"]
    scene_count = row["scene_count"]
    stash_id = row["stash_id"]

    # Keep the first scene, mark others for deletion
    keep_scene = scene_ids[0]
    delete_scenes = scene_ids[1:]

    scenes_to_keep.append(keep_scene)
    scenes_to_delete.extend(delete_scenes)

    print(f"Stash ID: {stash_id[:8]}... - Keeping scene {keep_scene}, deleting scenes: {delete_scenes}")

print("\nSummary:")
print(f"Total scenes to keep: {len(scenes_to_keep)}")
print(f"Total scenes to delete: {len(scenes_to_delete)}")
print(f"Total duplicate groups processed: {len(duplicate_stash_ids)}")

# Show the first few scenes to be deleted for verification
print(f"\nFirst 10 scenes to be deleted: {scenes_to_delete[:10]}")


Stash ID: 554aaa2c... - Keeping scene 7587, deleting scenes: ['15010', '18994', '28170']
Stash ID: 3bdf233f... - Keeping scene 2408, deleting scenes: ['4833', '7995', '23571']
Stash ID: 8e75e43f... - Keeping scene 2389, deleting scenes: ['4795', '16670']
Stash ID: f4d7c980... - Keeping scene 21624, deleting scenes: ['23906', '26708']
Stash ID: 3e89877e... - Keeping scene 2398, deleting scenes: ['4804', '16677']
Stash ID: 20022766... - Keeping scene 2392, deleting scenes: ['4798', '16673']
Stash ID: 7fc753c9... - Keeping scene 14098, deleting scenes: ['22145', '22878']
Stash ID: 27e55214... - Keeping scene 2431, deleting scenes: ['11882', '12567']
Stash ID: 0b982c26... - Keeping scene 2395, deleting scenes: ['4801', '16676']
Stash ID: b5cdf343... - Keeping scene 2388, deleting scenes: ['4794', '16669']
Stash ID: 9841ee75... - Keeping scene 2394, deleting scenes: ['4800', '16675']
Stash ID: ffaddebc... - Keeping scene 2941, deleting scenes: ['8420', '10706']
Stash ID: 9a1e774f... - Keepi

In [16]:
# APPLY STEP - Execute the deletions
# WARNING: This will permanently delete scenes from your database!
# Make sure you have a backup if needed

import time


deleted_scenes = []
failed_deletions = []

print(f"Starting deletion of {len(scenes_to_delete)} duplicate scenes...")
print("This may take a while...")

for i, scene_id in enumerate(scenes_to_delete):
    try:
        # Delete the scene
        result = stash.destroy_scene(scene_id)
        deleted_scenes.append(scene_id)

        if (i + 1) % 50 == 0:  # Progress update every 50 deletions
            print(f"Processed {i + 1}/{len(scenes_to_delete)} scenes...")

        # Small delay to avoid overwhelming the server
        time.sleep(0.1)

    except Exception as e:
        print(f"Failed to delete scene {scene_id}: {e!s}")
        failed_deletions.append({"scene_id": scene_id, "error": str(e)})

print("\n=== DELETION COMPLETE ===")
print(f"Successfully deleted: {len(deleted_scenes)} scenes")
print(f"Failed deletions: {len(failed_deletions)} scenes")

if failed_deletions:
    print("\nFailed deletions:")
    for failure in failed_deletions:
        print(f"  Scene {failure['scene_id']}: {failure['error']}")

print(f"\nTotal scenes that should remain: {len(scenes_to_keep)}")
print(f"Original duplicate groups: {len(duplicate_stash_ids)}")
print(f"Scenes deleted: {len(deleted_scenes)}")
print(f"Expected reduction: {len(scenes_to_delete)} scenes")


Starting deletion of 492 duplicate scenes...
This may take a while...
Processed 50/492 scenes...
Processed 100/492 scenes...
Processed 150/492 scenes...
Processed 200/492 scenes...
Processed 250/492 scenes...
Processed 300/492 scenes...
Processed 350/492 scenes...
Processed 400/492 scenes...
Processed 450/492 scenes...

=== DELETION COMPLETE ===
Successfully deleted: 492 scenes
Failed deletions: 0 scenes

Total scenes that should remain: 470
Original duplicate groups: 470
Scenes deleted: 492
Expected reduction: 492 scenes
