In [None]:
%pip install psycopg2-binary
%pip install sqlalchemy

In [12]:
import psycopg
import pandas as pd

from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

user = os.environ.get("CE_DB_USERNAME")
pw = os.environ.get("CE_DB_PASSWORD")
host = os.environ.get("CE_DB_HOST")
port = os.environ.get("CE_DB_PORT")
db = os.environ.get("CE_DB_NAME")

connection_string = f"dbname={db} user={user} password={pw} host={host} port={port}"

def hex_to_binary(hex_string):
    return bin(int(hex_string, 16))[2:].zfill(64)

def calculate_hamming_distance(phash1, phash2):
    # Convert hexadecimal phashes to binary
    binary1 = hex_to_binary(phash1)
    binary2 = hex_to_binary(phash2)
    
    # Ensure both binary strings are of equal length
    if len(binary1) != len(binary2):
        raise ValueError("Binary strings must be of equal length")
    
    # Calculate Hamming distance
    return sum(c1 != c2 for c1, c2 in zip(binary1, binary2))

# Example usage:
# phash1 = "951428607cf7cb8f"
# phash2 = "951428607cf7cb8e"
# distance = calculate_hamming_distance(phash1, phash2)
# print(f"Hamming distance between {phash1} and {phash2}: {distance}")

def levenshtein(s1, s2):
    from Levenshtein import distance
    return distance(s1, s2)


In [57]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query
    query = "SELECT * FROM sites ORDER BY name"
    cursor.execute(query)

    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_sites = pd.DataFrame(results, columns=column_names)


In [66]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query with COALESCE to handle -infinity dates
    query = """
        SELECT sites.name, 
               releases.uuid AS release_uuid,
               COALESCE(NULLIF(releases.release_date, '-infinity'), '1970-01-01'::date) as release_date,
               releases.short_name, 
               releases.name, 
               releases.url,
               downloads.uuid AS downloads_uuid,
               downloads.file_type,
               downloads.content_type,
               downloads.saved_filename,
               downloads.file_metadata
        FROM releases
        JOIN sites ON releases.site_uuid = sites.uuid
        JOIN downloads ON releases.uuid = downloads.release_uuid
        WHERE sites.name IN ('Kelly Madison', 'PornFidelity', 'TeenFidelity', 'Viv Thomas')
        ORDER BY sites.name, releases.release_date
    """
    cursor.execute(query)

    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_releases_of_specific_sites = pd.DataFrame(results, columns=column_names)
    df_releases_of_specific_sites['sha256'] = df_releases_of_specific_sites['file_metadata'].apply(lambda x: x['sha256Sum'] if isinstance(x, dict) and 'sha256Sum' in x else None)


In [14]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query
    query = """
        SELECT sub_sites.*, sites.name AS site_name
        FROM sub_sites
        JOIN sites ON sub_sites.site_uuid = sites.uuid
        ORDER BY sub_sites.name
    """
    cursor.execute(query)

    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_subsites = pd.DataFrame(results, columns=column_names)


In [15]:
from libraries.client_stashapp import get_stashapp_client

stash = get_stashapp_client()

def get_parent_studio_id(studio):
    if studio["parent_studio"] is not None:
        return studio["parent_studio"]["id"]
    return None


dUsing stash (v0.27.2-8-g2992e155) endpoint at http://localhost:6969/graphql
dPersisting Connection to Stash with ApiKey...


In [16]:
studios = stash.find_studios({})
df_stash_studios = pd.DataFrame(studios)
df_stash_studios["parent_studio_id"] = df_stash_studios.apply(get_parent_studio_id, axis=1)

# Lookup functions
def get_studio_by_id(studio_id):
    return df_stash_studios[df_stash_studios["id"] == studio_id]

def get_studio_by_name(studio_name):
    return df_stash_studios[df_stash_studios["name"] == studio_name]

In [17]:
# Create new dataframes with prefixed column names
df_sites_prefixed = df_sites.add_prefix('culture_extractor_')
df_stash_studios_prefixed = df_stash_studios.add_prefix('stash_')

# Merge the prefixed dataframes
df_matched_studios = pd.merge(df_stash_studios_prefixed, df_sites_prefixed, 
                              left_on='stash_name', 
                              right_on='culture_extractor_name', 
                              how='inner')

In [18]:
# Store Culture Extractor UUID in Stash studio
name = "SexArt"

df_matched_studio = df_matched_studios[df_matched_studios["stash_name"] == name]
if not df_matched_studio.empty:
    df_matched_studio = df_matched_studio.iloc[0]
else:
    print(f"No studio found with name: {name}")
    raise ValueError(f"No studio found with name: {name}")

refreshed_studio = stash.find_studio(name)
assert refreshed_studio is not None, f"No studio found with name: {name}"

stashbox_ids = refreshed_studio["stash_ids"]
culture_extractor_endpoint = "https://culture.extractor/graphql"
culture_extractor_uuid = str(df_matched_studio["culture_extractor_uuid"])
existing_stash_id = next((stash_id for stash_id in stashbox_ids if stash_id["endpoint"] == culture_extractor_endpoint), None)

if existing_stash_id:
    if existing_stash_id["stash_id"] != culture_extractor_uuid:
        existing_stash_id["stash_id"] = culture_extractor_uuid
        stash.update_studio({"id": refreshed_studio["id"], "stash_ids": stashbox_ids})
        print(f"Updated stash_id for {culture_extractor_endpoint}")
else:
    stashbox_ids.append({"endpoint": culture_extractor_endpoint, "stash_id": culture_extractor_uuid})
    stash.update_studio({"id": refreshed_studio["id"], "stash_ids": stashbox_ids})
    print(f"Added new stash_id for {culture_extractor_endpoint}")


dmatched "SexArt" to "SexArt" (5) using primary name


In [53]:
# Get studio for scene matching
stash_site_name = 'Slayed'

current_studio = get_studio_by_name(stash_site_name)

if isinstance(current_studio, pd.DataFrame) and not current_studio.empty:
    current_studio = current_studio.iloc[0].to_dict()

culture_extractor_site_uuid = None
if isinstance(current_studio, dict) and "stash_ids" in current_studio:
    for stash_id in current_studio["stash_ids"]:
        if isinstance(stash_id, dict) and stash_id.get("endpoint") == "https://culture.extractor/graphql":
            culture_extractor_site_uuid = stash_id.get("stash_id")
            break

assert culture_extractor_site_uuid is not None, f"No Culture Extractor site uuid found for {stash_site_name}"
print(f"Matched Stash studio {stash_site_name} to Culture Extractor site uuid {culture_extractor_site_uuid}")


Matched Stash studio Slayed to Culture Extractor site uuid 018c0784-4977-7064-8a5c-09b0b2c73f70


In [130]:
delete_uuid = "018b8e83-e2e3-718e-966d-c4f745149c79"

with psycopg.connect(connection_string) as conn:
    cursor = conn.cursor()
    cursor.execute("DELETE FROM releases WHERE uuid = %s", (delete_uuid,))
    cursor.close()


In [54]:
# Get scenes from Culture Extractor
query_template = """
    SELECT
        sites.uuid AS site_uuid,
        sites.short_name AS site_short_name,
        sites.name AS site_name,
        
        releases.uuid AS release_uuid,
        releases.release_date AS release_date,
        releases.short_name AS release_short_name,
        releases.name AS release_name,
        releases.url AS release_url,
        releases.json_document AS release_json_document,
        downloads.uuid AS downloads_uuid,
        downloads.downloaded_at AS downloads_downloaded_at,
        downloads.variant AS downloads_variant,
        downloads.file_type AS downloads_file_type,
        downloads.content_type AS downloads_content_type,
        downloads.file_metadata AS downloads_file_metadata
    FROM releases
    JOIN sites ON releases.site_uuid = sites.uuid
    JOIN downloads ON releases.uuid = downloads.release_uuid
    WHERE
        sites.uuid = '%s' AND
        downloads.file_type = 'video' AND
        downloads.content_type = 'scene' AND
        (downloads.variant NOT IN ('480x270', '270p', '320p', '360p', '270p MOBILE'));
    """
query = query_template % culture_extractor_site_uuid

with psycopg.connect(connection_string) as conn:
    cursor = conn.cursor()
    cursor.execute(query)
    results = cursor.fetchall()
    column_names = [desc[0] for desc in cursor.description]
    cursor.close()

df_culture_extractor_scenes = pd.DataFrame(results, columns=column_names)
df_culture_extractor_scenes["culture_extractor_duration"] = df_culture_extractor_scenes["downloads_file_metadata"].apply(lambda x: x["duration"] if isinstance(x, dict) and "duration" in x else None)
df_culture_extractor_scenes["culture_extractor_phash"] = df_culture_extractor_scenes["downloads_file_metadata"].apply(lambda x: x["phash"] if isinstance(x, dict) and "phash" in x else None)
df_culture_extractor_scenes

Unnamed: 0,site_uuid,site_short_name,site_name,release_uuid,release_date,release_short_name,release_name,release_url,release_json_document,downloads_uuid,downloads_downloaded_at,downloads_variant,downloads_file_type,downloads_content_type,downloads_file_metadata,culture_extractor_duration,culture_extractor_phash
0,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c72c6-d7db-7426-b204-b96104245053,2023-12-12,curvy-baddies-kendra-and-gizelle-get-wet-and-s...,Curvy Baddies Kendra And Gizelle Get Wet And S...,https://members.slayed.com/videos/curvy-baddie...,{'data': {'findOneVideo': {'id': 'slayed:curvy...,018c72cb-1f62-7410-abb1-39192d5eeeb1,2023-12-16 15:22:43.682978,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 2222, 'ph...",2222,cf28303616d17d2f
1,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c0789-a53f-747f-a273-9e94dc9f216a,2023-05-30,premium-service,Premium Service,https://members.slayed.com/videos/premium-service,{'data': {'findOneVideo': {'id': 'slayed:premi...,018c1f2f-592c-71da-bb5e-b2de8fdea98a,2023-11-30 09:44:05.932222,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 2287, 'ph...",2287,f117b465d23a8e32
2,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c1ea6-8089-70e4-a55f-f1c98d9b4a5d,2023-11-28,insatiable-olivia-ravishes-chloes-heavenly-body,Insatiable Olivia Ravishes Chloes Heavenly Body,https://members.slayed.com/videos/insatiable-o...,{'data': {'findOneVideo': {'id': 'slayed:insat...,018c1ec1-7509-7114-a20c-540518af4710,2023-11-30 07:44:04.105657,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 2470, 'ph...",2470,af9a3f64b30a2c83
3,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c0788-ed05-75a6-a98e-32335390cf8b,2023-11-14,dripping-wet-duo-cherry-and-skye-lick-each-oth...,Dripping Wet Duo Cherry And Skye Lick Each Oth...,https://members.slayed.com/videos/dripping-wet...,{'data': {'findOneVideo': {'id': 'slayed:dripp...,018c1ec4-a40a-7222-87f7-b3fd5870b537,2023-11-30 07:47:32.746698,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 2273, 'ph...",2273,829b1d944bd6c557
4,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c0788-f7aa-711d-a0a5-e32119392481,2023-10-31,thirsty-jennie-makes-bestie-queenie-squirt-all...,Thirsty Jennie Makes Bestie Queenie Squirt All...,https://members.slayed.com/videos/thirsty-jenn...,{'data': {'findOneVideo': {'id': 'slayed:thirs...,018c1ec8-3069-75d8-b212-15b50ea32023,2023-11-30 07:51:25.289193,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 2383, 'ph...",2383,e232c13d5e03be56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c078e-733c-77a9-9d71-295da108975f,2021-07-15,dress-up,Dress Up,https://members.slayed.com/videos/dress-up,{'data': {'findOneVideo': {'id': 'slayed:dress...,018c2597-74a1-7492-958c-b0538ceaef97,2023-12-01 15:35:32.001891,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 2353, 'ph...",2353,a33c0fc8ae7d890d
116,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c078e-7cf4-767a-8528-70f19f1fec46,2021-07-08,sins-of-the-flesh,Sins Of The Flesh,https://members.slayed.com/videos/sins-of-the-...,{'data': {'findOneVideo': {'id': 'slayed:sins-...,018c25a2-5735-71c1-84da-ccbac5df81e8,2023-12-01 15:47:25.365885,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 2687, 'ph...",2687,821c17fdc38aab9a
117,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c078e-8724-7783-bf97-4648d8b14d07,2021-07-01,devour,Devour,https://members.slayed.com/videos/devour,{'data': {'findOneVideo': {'id': 'slayed:devou...,018c25aa-563b-721e-af37-e53f753fbfba,2023-12-01 15:56:09.403874,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 1928, 'ph...",1928,a7e700295475a737
118,018c0784-4977-7064-8a5c-09b0b2c73f70,slayed,Slayed,018c078e-9120-7131-b308-e7d9e3ef96e4,2021-06-24,nightcall,Nightcall,https://members.slayed.com/videos/nightcall,{'data': {'findOneVideo': {'id': 'slayed:night...,018c25ae-6992-77ce-a41a-ed32902fa5f3,2023-12-01 16:00:36.498510,4K MP4 UHD,video,scene,"{'$type': 'VideoHashes', 'duration': 1442, 'ph...",1442,d61a381b020efb7b


In [None]:
df_nonunique_release_short_name = df_culture_extractor_scenes[df_culture_extractor_scenes.duplicated(subset=['release_short_name'], keep=False)]
df_nonunique_release_short_name


In [22]:
# Get scenes from Stash
stash_scenes = stash.find_scenes(
    {
        "studios": { "value": [current_studio["id"]], "excludes": [], "modifier": "INCLUDES", "depth": -1 }
    },
    # filter={
    #     "per_page": 500, "page": 1, "sort": "path", "direction": "DESC"
    # },
    fragment="id title code date files { id path basename fingerprints { type value } format width height video_codec frame_rate duration } studio { id name tags { id name } } performers { id name gender tags { id name} } stash_ids { endpoint stash_id } urls"
)
df_stash_scenes = pd.DataFrame(stash_scenes)

def get_endpoint_stash_id(stash_ids, endpoint):
    for stash_id in stash_ids:
        if stash_id["endpoint"] == endpoint:
            return stash_id["stash_id"]
    return None

def get_tpdb_id(stash_ids):
    return get_endpoint_stash_id(stash_ids, "https://theporndb.net/graphql")

def get_stashdb_id(stash_ids):
    return get_endpoint_stash_id(stash_ids, "https://stashdb.org/graphql")

def get_culture_extractor_id(stash_ids):
    return get_endpoint_stash_id(stash_ids, "https://culture.extractor/graphql")

df_stash_scenes["date"] = pd.to_datetime(df_stash_scenes["date"])
df_stash_scenes["stashdb_id"] = df_stash_scenes["stash_ids"].apply(get_stashdb_id)
df_stash_scenes["tpdb_id"] = df_stash_scenes["stash_ids"].apply(get_tpdb_id)
df_stash_scenes["culture_extractor_id"] = df_stash_scenes["stash_ids"].apply(get_culture_extractor_id)
df_stash_scenes["stash_duration"] = df_stash_scenes["files"].apply(lambda x: x[0]["duration"])
df_stash_scenes["stash_phash"] = df_stash_scenes["files"].apply(lambda x: next((y["value"] for y in x[0]["fingerprints"] if y["type"] == "phash"), None))
df_stash_scenes["stash_oshash"] = df_stash_scenes["files"].apply(lambda x: next((y["value"] for y in x[0]["fingerprints"] if y["type"] == "oshash"), None))

df_stash_scenes

Unnamed: 0,id,title,code,date,files,studio,performers,stash_ids,urls,stashdb_id,tpdb_id,culture_extractor_id,stash_duration,stash_phash,stash_oshash
0,150,Me and Myself Part 2,20170526/ME_AND_MYSELF_PART_2,2017-05-26,"[{'id': '150', 'path': 'W:\Culture\Videos\Site...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '139', 'name': 'Alexis Crystal', 'gend...",[{'endpoint': 'https://culture.extractor/graph...,[https://www.sexart.com/model/alexis-crystal-a...,5d2d3eef-ee07-49d3-b1b6-f25d57f81a0f,e1f62faa-a2f7-4bca-94ce-6648ebac2721,018b8e85-cedd-73c3-bd8c-5a6eefc1ff4c,1109.44,8703020f0badfefc,6dcfcfee37cd095f
1,151,Girly Things,,2021-02-03,"[{'id': '151', 'path': 'W:\Culture\Videos\Site...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '139', 'name': 'Alexis Crystal', 'gend...","[{'endpoint': 'https://stashdb.org/graphql', '...",[https://www.sexart.com/model/alexis-crystal-a...,761ba31c-3d05-4b60-ba70-4db91dccca26,bd00b64a-a022-49aa-aa57-6439f1e06bef,,1200.76,a51977476870c736,e7d01d0fb8ed6df5
2,153,Close Up,20180810/CLOSE_UP,2018-08-10,"[{'id': '153', 'path': 'W:\Culture\Videos\Site...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '81', 'name': 'Amaris', 'gender': 'FEM...",[{'endpoint': 'https://culture.extractor/graph...,[https://www.sexart.com/model/alexis-crystal-a...,9568cdcc-f6b5-4d54-b65c-c49653287e52,b8a160fe-5611-42d9-bc8b-49f96a636ef3,018b8e85-a215-7410-8fcb-ac14357976fe,1454.64,d8eab96d121ae992,46603ecf2c33bee5
3,156,Mirroring,,2018-03-09,"[{'id': '156', 'path': 'W:\Culture\Videos\Site...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '139', 'name': 'Alexis Crystal', 'gend...","[{'endpoint': 'https://stashdb.org/graphql', '...",[https://www.sexart.com/model/linda-sweet-and-...,c86390be-5aa1-49e3-b40b-0e120b42ec9e,c1b526b8-5f44-4ac7-a8af-fe55eb25317a,,1316.84,807e471e1099fee3,ed6f341e90b75dfe
4,180,Under Cover,20160805/UNDER_COVER,2016-08-05,"[{'id': '180', 'path': 'W:\Culture\Videos\Site...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '221', 'name': 'Linda Sweet', 'gender'...",[{'endpoint': 'https://culture.extractor/graph...,[https://www.sexart.com/model/antonia-sainz-an...,c5ed1ed6-70b7-4335-9bb4-783e5203ffb9,1fb94ad8-a22a-447c-85bd-d227c1b49d6c,018b8e85-dd6d-7576-9200-08261c37e3dc,1327.56,e9296c4966c98dd6,954fbd7c72fbf3ad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,24264,Imagine,628EA954D07A4AF43968FCB881FDA282,2024-09-08,"[{'id': '428717', 'path': 'W:\Culture\Videos\S...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '632', 'name': 'Ricky Rascal', 'gender...","[{'endpoint': 'https://stashdb.org/graphql', '...",[https://www.sexart.com/model/sata-jones-and-r...,50daf919-e2bb-4c18-bffb-809b7945a9e3,,,2406.81,9456406aefea39a9,71d2fe9c6a70e8d4
446,24393,Velvet Secret,CC995D4C4C84C0A49DBE077D321BCC2C,2024-06-28,"[{'id': '438741', 'path': 'W:\Culture\Videos\S...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '2244', 'name': 'Kama Oxi', 'gender': ...","[{'endpoint': 'https://stashdb.org/graphql', '...",[https://www.sexart.com/model/kama-oxi-and-ley...,1b061924-f40f-466e-9d5f-612cf7f71b3c,,,1717.21,89295fd5c1172eaa,bac4b9b0332c9b6c
447,24394,Lustful Enchantment,F6B2BB97DF3C92D42530051C72BD4490,2024-03-29,"[{'id': '438742', 'path': 'W:\Culture\Videos\S...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '2850', 'name': 'Leya Desantis', 'gend...","[{'endpoint': 'https://stashdb.org/graphql', '...",[https://www.sexart.com/model/alice-zaffyre-an...,512aff54-8e82-43c4-a67a-ed1887f64259,,,1797.50,82d05587d7f26af0,3e6bbb1ddc11a2c7
448,24401,Vox Episode 2,,2015-12-09,"[{'id': '4700', 'path': 'X:\Culture\Videos\Vir...","{'id': '5', 'name': 'SexArt', 'tags': []}","[{'id': '241', 'name': 'Amarna Miller', 'gende...","[{'endpoint': 'https://stashdb.org/graphql', '...",[https://www.sexart.com/model/amarna-miller/mo...,6d7c7c86-03d1-4dae-9201-fc50aed5c9c5,,,896.19,8778f80378f883f8,bd7734b39f1fb93c


In [None]:
# Check for duplicate scenes based on StashDB ID
df_duplicate_stash_scenes = df_stash_scenes[
    (df_stash_scenes['stashdb_id'].notna()) &
    (df_stash_scenes.duplicated(subset=['stashdb_id'], keep=False))
]
df_duplicate_stash_scenes


In [56]:
# Add the duplicate tag to the scenes
duplicate_stashdb_ids_tag = stash.find_tag("StashDB: Duplicate Scenes Based On ID")
duplicate_stashdb_ids_tag

for index, row in df_duplicate_stash_scenes.iterrows():
    refreshed_scene = stash.find_scene(row["id"])
    existing_tag_ids = [tag["id"] for tag in refreshed_scene["tags"]]
    if duplicate_stashdb_ids_tag["id"] not in existing_tag_ids:
        updated_tag_ids = existing_tag_ids + [duplicate_stashdb_ids_tag["id"]]
        stash.update_scene({ "id": row["id"], "tag_ids": updated_tag_ids })


# Matching existing scenes

In [135]:
# Match Stash and Culture Extractor scenes based on phash
df_culture_extractor_scenes['release_date'] = pd.to_datetime(df_culture_extractor_scenes['release_date'])

# Merge the dataframes
df_merged_scenes = pd.merge(df_stash_scenes, df_culture_extractor_scenes, 
                            left_on='stash_phash', right_on='culture_extractor_phash', 
                            how='left')

df_merged_matched_scenes = df_merged_scenes.copy()
df_merged_matched_scenes = df_merged_matched_scenes[df_merged_matched_scenes["release_uuid"].notnull()]

df_merged_matched_scenes["duration_difference"] = df_merged_matched_scenes["stash_duration"] - df_merged_matched_scenes["culture_extractor_duration"]
df_merged_matched_scenes["phash_distance"] = df_merged_matched_scenes.apply(lambda row: calculate_hamming_distance(row["stash_phash"], row["culture_extractor_phash"]), axis=1)
df_merged_matched_scenes["title_levenshtein"] = df_merged_matched_scenes.apply(lambda row: levenshtein(row["title"], row["release_name"]), axis=1)

In [None]:
# Update Stash scenes with Culture Extractor ID
for index, row in df_merged_matched_scenes.iterrows():
    refreshed_scene = stash.find_scene(row["id"])

    release_uuid = row["release_uuid"]
    if pd.isnull(release_uuid):
        continue

    existing_stash_ids = refreshed_scene["stash_ids"]
    if "https://culture.extractor/graphql" not in [stash_id_obj["endpoint"] for stash_id_obj in existing_stash_ids]:
        existing_stash_ids.append({ "endpoint": "https://culture.extractor/graphql", "stash_id": str(release_uuid) })
        id = row["id"]
        code = row["release_short_name"]
        stash.update_scene({ "id": id, "code": code, "stash_ids": existing_stash_ids })
        print(f"Updated scene {id} with Culture Extractor ID {release_uuid}")


In [None]:
# Create a new column 'new_urls' where 'release_url' is appended to 'urls' if not already present
df_merged_scenes['new_urls'] = df_merged_scenes.apply(lambda row: row['urls'] + [row['release_url']] if row['release_url'] not in row['urls'] else row['urls'], axis=1)

df_merged_scenes[["id", "title", "new_urls"]]

In [None]:
# Create a new column 'new_urls' where 'release_url' is appended to 'urls' if not already present
df_merged_scenes['new_stash_ids'] = df_merged_scenes.apply(lambda row:
                                                           row['stash_ids'] + [{ "endpoint": "https://culture.extractor/graphql", "stash_id": str(row["release_uuid"]) }] if "https://culture.extractor/graphql" not in [stash_id_obj["endpoint"] for stash_id_obj in row['stash_ids']] else row['stash_ids'],
                                                           axis=1)
df_merged_scenes[["new_stash_ids"]].values


In [None]:
for index, row in df_merged_scenes.iterrows():
    id = row["id"]
    old_urls = row["urls"]
    new_urls = row["new_urls"]
    old_urls_set = set(old_urls)
    new_urls_set = set(new_urls)
    if old_urls_set != new_urls_set:
        print(row["title"])
        print(old_urls_set)
        print(new_urls_set)
        stash.update_scene({ "id": id, "urls": new_urls })

    # stash_ids = row["stash_ids"]
    # new_stash_ids = row["new_stash_ids"]
    # stash_ids_set = set(stash_id_obj["stash_id"] for stash_id_obj in row["stash_ids"])
    # new_stash_ids_set = set(stash_id_obj["stash_id"] for stash_id_obj in row["new_stash_ids"])
    # if stash_ids_set != new_stash_ids_set:
    #     print(row["title"])
    #     print(stash_ids_set)
    #     print(new_stash_ids_set)
    #     stash.update_scene({ "id": id, "stash_ids": new_stash_ids })

# Import new scenes

In [23]:
import os
import pandas as pd
from pathlib import Path
import re

# Define the root directory
root_dir = Path(r"F:\Ripping\Tickling Submission\Metadata")

# Initialize lists to store data
data = []

# Walk through the directory structure
for release_dir in root_dir.iterdir():
    if release_dir.is_dir():
        release_uuid = release_dir.name
        preview_image = None
        full_scene = None
        trailer = None

        # Check each file in the release directory
        for file in release_dir.iterdir():
            if file.suffix.lower() == '.jpg':
                preview_image = file.name
            elif file.suffix.lower() in ['.wmv', '.mp4']:
                try:
                    file_stem = file.stem  # Get filename without extension
                    if file_stem.endswith(release_uuid):
                        trailer = file.name
                    elif re.search(r'- \d+x\d+$', file_stem):
                        full_scene = file.name
                except OSError as e:
                    print(f"Error accessing file: {file}. Error: {e}")
                    continue

        # Append data to the list
        data.append({
            'release_uuid': release_uuid,
            'preview_image': preview_image,
            'full_scene': full_scene,
            'trailer': trailer
        })

# Create a DataFrame
df_files = pd.DataFrame(data)


In [24]:
df_merged = pd.merge(df_files, df_culture_extractor_scenes, 
                     left_on='release_uuid', 
                     right_on='release_uuid', 
                     how='left')


In [None]:
# Get release_uuid values where full_scene is missing
missing_full_scene = df_merged[df_merged['full_scene'].isnull()]['release_uuid'].tolist()

# Format the list for VS Code breakpoint condition
breakpoint_condition = f"release_id in {missing_full_scene}"

print("VS Code breakpoint condition:")
print(breakpoint_condition)


In [None]:
len(missing_full_scene)

# Trailers

In [55]:
studio_name = "Vixen"

studio_for_trailers = stash.find_studio(studio_name)
scenes_for_trailers = stash.find_scenes(
    {
        "studios": {
            "value": [studio_for_trailers["id"]],
            "excludes": [],
            "modifier": "INCLUDES",
            "depth": -1
        }
    },
    fragment="id title code date files { id path basename fingerprints { type value } format width height video_codec frame_rate duration } studio { id name tags { id name } } performers { id name gender tags { id name} } stash_ids { endpoint stash_id } urls"
)

df_stash_scenes_for_trailers = pd.DataFrame(scenes_for_trailers)
df_stash_scenes_for_trailers["stash_oshash"] = df_stash_scenes_for_trailers["files"].apply(lambda x: next((y["value"] for y in x[0]["fingerprints"] if y["type"] == "oshash"), None))
df_stash_scenes_for_trailers["culture_extractor_uuid"] = df_stash_scenes_for_trailers["stash_ids"].apply(lambda x: get_culture_extractor_id(x))

dmatched "Vixen" to "Vixen" (77) using primary name


In [40]:
trailer_tag = stash.find_tag("Trailer Associated", create=True)


iCould not find tag with name='Trailer Associated' creating


In [56]:
import shutil

# Source paths
culture_extractor_trailer_base_path_d = f"D:\\Ripping\\{studio_name}\\Metadata\\"
culture_extractor_trailer_base_path_f = f"F:\\Ripping\\{studio_name}\\Metadata\\"

# Target path
stash_trailer_base_path = "H:\\Stash\\generated\\trailers"

for index, row in df_stash_scenes_for_trailers.iterrows():
    scene_id = row["id"]
    scene_oshash = row["stash_oshash"]
    culture_extractor_uuid = row["culture_extractor_uuid"]

    stash_trailer_path = os.path.join(stash_trailer_base_path, f"{scene_oshash}.mp4")
    if os.path.exists(stash_trailer_path):
        refreshed_scene = stash.find_scene(scene_id)
        existing_tag_ids = [tag["id"] for tag in refreshed_scene["tags"]]
        if trailer_tag["id"] not in existing_tag_ids:
            updated_tag_ids = existing_tag_ids + [trailer_tag["id"]]
            stash.update_scene({ "id": scene_id, "tag_ids": updated_tag_ids })
        
        print(f"Stash: Trailer already exists for scene {scene_id} at {stash_trailer_path}")
        continue
    
    trailer_filename_candidates = ["trailer_2160.mp4", "trailer_1080.mp4"]
    trailer_candidate_paths_d = [os.path.join(culture_extractor_trailer_base_path_d, f"{culture_extractor_uuid}", filename) for filename in trailer_filename_candidates]
    trailer_candidate_paths_f = [os.path.join(culture_extractor_trailer_base_path_f, f"{culture_extractor_uuid}", filename) for filename in trailer_filename_candidates]    
    matching_trailer_path = next((path for path in trailer_candidate_paths_d + trailer_candidate_paths_f if os.path.exists(path)), None)
    if matching_trailer_path:
        shutil.copy(matching_trailer_path, stash_trailer_path)
        print(f"Copied trailer for scene {scene_id} to {stash_trailer_path}")
        refreshed_scene = stash.find_scene(scene_id)
        existing_tag_ids = [tag["id"] for tag in refreshed_scene["tags"]]
        if trailer_tag["id"] not in existing_tag_ids:
            updated_tag_ids = existing_tag_ids + [trailer_tag["id"]]
            stash.update_scene({ "id": scene_id, "tag_ids": updated_tag_ids })
    else:
        print(f"No trailer found for scene {scene_id}")
        continue


Copied trailer for scene 2619 to H:\Stash\generated\trailers\5114cd36f7fea41e.mp4
Copied trailer for scene 2631 to H:\Stash\generated\trailers\c48d2969e370e1e1.mp4
Copied trailer for scene 2635 to H:\Stash\generated\trailers\de800dbeca8650ae.mp4
Copied trailer for scene 2638 to H:\Stash\generated\trailers\6ec17baad270543d.mp4
Copied trailer for scene 2646 to H:\Stash\generated\trailers\db28e41bc53efdd1.mp4
Copied trailer for scene 2647 to H:\Stash\generated\trailers\f3bb5e7e620578fd.mp4
Copied trailer for scene 2654 to H:\Stash\generated\trailers\bbff5f7f25b4652b.mp4
Copied trailer for scene 2657 to H:\Stash\generated\trailers\ff83cbea5c9327cd.mp4
Copied trailer for scene 2658 to H:\Stash\generated\trailers\a79eed8394883dfe.mp4
Copied trailer for scene 2661 to H:\Stash\generated\trailers\9288a838c834d9bc.mp4
Copied trailer for scene 2662 to H:\Stash\generated\trailers\a1fc6e030c9ab070.mp4
Copied trailer for scene 2663 to H:\Stash\generated\trailers\0b0f4d0b4ef21bca.mp4
Copied trailer f