In [None]:
%pip install psycopg2-binary
%pip install sqlalchemy

In [1]:
import psycopg
import pandas as pd

from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

user = os.environ.get("CE_DB_USERNAME")
pw = os.environ.get("CE_DB_PASSWORD")
host = os.environ.get("CE_DB_HOST")
port = os.environ.get("CE_DB_PORT")
db = os.environ.get("CE_DB_NAME")

connection_string = f"dbname={db} user={user} password={pw} host={host} port={port}"

def hex_to_binary(hex_string):
    return bin(int(hex_string, 16))[2:].zfill(64)

def calculate_hamming_distance(phash1, phash2):
    # Convert hexadecimal phashes to binary
    binary1 = hex_to_binary(phash1)
    binary2 = hex_to_binary(phash2)
    
    # Ensure both binary strings are of equal length
    if len(binary1) != len(binary2):
        raise ValueError("Binary strings must be of equal length")
    
    # Calculate Hamming distance
    return sum(c1 != c2 for c1, c2 in zip(binary1, binary2))

# Example usage:
# phash1 = "951428607cf7cb8f"
# phash2 = "951428607cf7cb8e"
# distance = calculate_hamming_distance(phash1, phash2)
# print(f"Hamming distance between {phash1} and {phash2}: {distance}")

def levenshtein(s1, s2):
    from Levenshtein import distance
    return distance(s1, s2)


In [2]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query
    query = "SELECT * FROM sites ORDER BY name"
    cursor.execute(query)

    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_sites = pd.DataFrame(results, columns=column_names)


In [23]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query with COALESCE to handle -infinity dates
    query = """
        SELECT sites.name AS site_name,
               releases.uuid AS release_uuid,
               COALESCE(NULLIF(releases.release_date, '-infinity'), '1970-01-01'::date) as release_date,
               releases.short_name AS release_short_name, 
               releases.name AS release_name, 
               releases.url AS release_url,
               releases.json_document AS release_json_document
        FROM releases
        JOIN sites ON releases.site_uuid = sites.uuid
        WHERE sites.name = 'Tickling Submission'
    """
    cursor.execute(query)
    
    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_tickling_submission_releases = pd.DataFrame(results, columns=column_names)
    df_tickling_submission_releases = df_tickling_submission_releases.copy().add_prefix('culture_extractor_')
    df_tickling_submission_releases["culture_extractor_release_uuid"] = df_tickling_submission_releases["culture_extractor_release_uuid"].astype(str)


In [25]:
# List files from tickling submission releases
tickling_submission_path = "Z:\\Culture\\Videos\\Sites\\Tickling Submission\\"

# Read all files from the path
tickling_submission_files = [f for f in os.listdir(tickling_submission_path) if not f.endswith('.json')]
tickling_submission_files

df_tickling_submissions_files = pd.DataFrame(tickling_submission_files, columns=["file_name"])
# Extract id from file names like "Tickling Submission - 2007-10-15 - Susan Bedspread & Tickle Torture  - 0192228c-f4ae-7532-bc30-6b1afc518c65 - 852x480.wmv"
df_tickling_submissions_files["id"] = df_tickling_submissions_files["file_name"].str.extract(r' - (\w{8}-\w{4}-\w{4}-\w{4}-\w{12}) -')


In [26]:
df_tickling_submissions_joined = pd.merge(df_tickling_submissions_files, df_tickling_submission_releases, left_on="id", right_on="culture_extractor_release_uuid", how="inner")
df_tickling_submissions_joined

Unnamed: 0,file_name,id,culture_extractor_site_name,culture_extractor_release_uuid,culture_extractor_release_date,culture_extractor_release_short_name,culture_extractor_release_name,culture_extractor_release_url,culture_extractor_release_json_document
0,Tickling Submission - 2007-10-15 - Susan Bedsp...,0192228c-f4ae-7532-bc30-6b1afc518c65,Tickling Submission,0192228c-f4ae-7532-bc30-6b1afc518c65,2007-10-15,susan-bedspread-tickle-torture,Susan Bedspread & Tickle Torture,http://www.tickling-submission.com/movie/tickl...,"{'duration': '6:35', 'html': '<!DOCTYPE html P..."
1,Tickling Submission - 2007-10-22 - Susan on Si...,0192228d-9d17-70e6-891f-b7dea5b74014,Tickling Submission,0192228d-9d17-70e6-891f-b7dea5b74014,2007-10-22,susan-silver-master,Susan on Silver master,http://www.tickling-submission.com/movie/tickl...,"{'duration': '7:37', 'html': '<!DOCTYPE html P..."
2,Tickling Submission - 2007-10-23 - Sandra Wors...,01922210-4818-707c-a929-2fdc5d4905af,Tickling Submission,01922210-4818-707c-a929-2fdc5d4905af,2007-10-23,sandra-worshipped-susans-feet-susan-loves-sand...,Sandra Worshipped Susan's Feet - Susan Loves S...,http://www.tickling-submission.com/movie/foot-...,"{'duration': '12:01', 'html': '<!DOCTYPE html ..."
3,Tickling Submission - 2008-06-12 - Tickling Pa...,0192228e-4588-7608-b7c5-2041070bd0d2,Tickling Submission,0192228e-4588-7608-b7c5-2041070bd0d2,2008-06-12,tickling-party,Tickling Party,http://www.tickling-submission.com/movie/tickl...,"{'duration': '5:38', 'html': '<!DOCTYPE html P..."
4,Tickling Submission - 2008-06-28 - Anita Force...,0192220f-ceea-76ee-911a-bd3786dc92e0,Tickling Submission,0192220f-ceea-76ee-911a-bd3786dc92e0,2008-06-28,anita-forced-orgasm,Anita Forced Orgasm,http://www.tickling-submission.com/movie/force...,"{'duration': '6:49', 'html': '<!DOCTYPE html P..."
...,...,...,...,...,...,...,...,...,...
298,Tickling Submission - 2017-11-05 - Ticklish Em...,0192232d-0827-71ea-ae1a-7c00189cb05b,Tickling Submission,0192232d-0827-71ea-ae1a-7c00189cb05b,2017-11-05,ticklish-emylia-devil-nylon,Ticklish Emylia On The Devil In Nylon,http://www.tickling-submission.com/movie/tickl...,"{'duration': '9:00', 'html': '<!DOCTYPE html P..."
299,Tickling Submission - 2017-11-10 - Naked & Tic...,0192232d-7fe8-761e-9747-7c696f79255a,Tickling Submission,0192232d-7fe8-761e-9747-7c696f79255a,2017-11-10,naked-tickled-katy-pearl,Naked & Tickled Katy Pearl,http://www.tickling-submission.com/movie/tickl...,"{'duration': '13:00', 'html': '<!DOCTYPE html ..."
300,Tickling Submission - 2018-01-24 - Nylon Tickl...,01922334-d7a3-704f-8960-401e7a82cfb6,Tickling Submission,01922334-d7a3-704f-8960-401e7a82cfb6,2018-01-24,nylon-tickling-pornstar-lovita-fate,Nylon Tickling Pornstar Lovita Fate,http://www.tickling-submission.com/movie/tickl...,"{'duration': '11:00', 'html': '<!DOCTYPE html ..."
301,Tickling Submission - 2018-02-20 - Tickle Tort...,01922339-20a5-71cc-8e8d-9f5f0da5620e,Tickling Submission,01922339-20a5-71cc-8e8d-9f5f0da5620e,2018-02-20,tickle-tortura-lovita,Tickle Tortura Lovita,http://www.tickling-submission.com/movie/tickl...,"{'duration': '8:00', 'html': '<!DOCTYPE html P..."


In [47]:
tickling_submission_stash_scenes = stash.find_scenes({}, q="Tickling Submission")


In [48]:
tickling_submission_stash_scenes


[{'id': '22538',
  'title': 'Mummified hard foot tickled',
  'code': '',
  'details': 'Alexis is bound on the tickle table with her body wrapped, she is completely immobile and totally vunerable for tickling. With a pair of delicious, ticklish bare feet bound in front of her, Rene wastes no time and soon sets her nimble fingers to work! She tickles her bare feet, she shows no mercy and is relentless! Extra devious thing… its hairbrush time! Don’t miss this brand new tickling submission clip, with the hot Alexis and her sexy feet tickled crazy!',
  'director': '',
  'urls': ['http://www.tickling-submission.com/movie/tickled-feet/mummified-hard-foot-tickled/'],
  'date': '2015-12-11',
  'rating100': None,
  'organized': False,
  'o_counter': 0,
  'interactive': False,
  'interactive_speed': None,
  'captions': None,
  'created_at': '2024-07-02T10:46:15+03:00',
  'updated_at': '2024-12-20T05:47:52+02:00',
  'last_played_at': None,
  'resume_time': 0,
  'play_duration': 0,
  'play_count': 

In [50]:
import re

# Extract UUID from stash scene filenames
stash_scene_uuids = []
for scene in tickling_submission_stash_scenes:
    if scene.get('files') and len(scene['files']) > 0:
        # Extract UUID pattern from basename using regex
        basename = scene['files'][0].get('basename', '')
        uuid_match = re.search(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', basename)
        if uuid_match:
            stash_scene_uuids.append({
                'stash_scene': scene,
                'uuid': uuid_match.group(1)
            })

# Create mapping dictionary from UUID to stash scene
stash_scene_map = {x['uuid']: x['stash_scene'] for x in stash_scene_uuids}

# Add stash scene data to dataframe by matching UUIDs
df_tickling_submissions_joined['stash_scene'] = df_tickling_submissions_joined['culture_extractor_release_uuid'].map(stash_scene_map)


In [51]:
# Extract basename from stash scene files
df_tickling_submissions_joined['basename'] = df_tickling_submissions_joined['stash_scene'].apply(
    lambda x: x['files'][0]['basename'] if x and 'files' in x and len(x['files']) > 0 else None
)


In [32]:
from bs4 import BeautifulSoup

def extract_performers(json_doc):
    if not isinstance(json_doc, dict) or 'html' not in json_doc:
        return []
    
    html_document = json_doc['html']
    soup = BeautifulSoup(html_document, 'html.parser')
    
    performers_element = soup.find('div', class_='field-field-tag-performers')
    if not performers_element:
        return []
        
    performers = performers_element.find_all('a')
    performers_data = []
    for performer in performers:
        performer_id = performer['href'].split('/')[-1]
        performer_name = performer.text.strip()
        performers_data.append({'id': performer_id, 'name': performer_name})
    
    return performers_data

# Apply to whole dataframe
df_tickling_submissions_joined['performers'] = df_tickling_submissions_joined['culture_extractor_release_json_document'].apply(extract_performers)

# Create a string column with performer names
df_tickling_submissions_joined['performers_string'] = df_tickling_submissions_joined['performers'].apply(
    lambda x: ', '.join(p['name'] for p in x)
)


In [65]:
# Extract scene description from HTML. It is in div.product-body with p elements
from bs4 import BeautifulSoup

def extract_description(json_doc):
    if not isinstance(json_doc, dict) or 'html' not in json_doc:
        return []
    
    html_document = json_doc['html']
    soup = BeautifulSoup(html_document, 'html.parser')
    description = soup.find('div', class_='product-body')
    
    if not description:
        return None
        
    # Get all paragraphs and join with newlines
    paragraphs = description.find_all('p')
    if len(paragraphs) > 1:
        print(len(paragraphs))
        print(paragraphs)
    if paragraphs:
        return '\n'.join(p.text.strip() for p in paragraphs)
    else:
        return description.text.strip()

# Apply to whole dataframe
df_tickling_submissions_joined['description'] = df_tickling_submissions_joined['culture_extractor_release_json_document'].apply(extract_description)


2
[<p>Melisa is tightly bounded on the rack. She is nice petite girl with hungry pussy. Sabrina give her forced orgasm, but tickling is foreplay. Then Sabrina holds dildo on her pussy and at the same time she softly tickle her feet. Melisa is sweet giggler and enjoy every touch with dildo. She will feel the vibrations of our dildo through her soft panties. She moan in pure pleasure and she forget on tickling! Don't miss it!</p>, <p><a href="http://www.clips4sale.com/work/store/index.php?storeid=2676&amp;buy=8305069&amp;checkout=2" rel="nofollow"></a></p>]


In [52]:
stash_performers_by_name_and_id = {}
for _, release in df_tickling_submissions_joined.iterrows():
    for performer in release["performers"]:
        stash_performer = stash.find_performer({ "name": "Tickling Submission " + performer["name"] }, create=True)
        stash_performers_by_name_and_id[stash_performer["name"]] = stash_performer



dmatched performer "Tickling Submission Susan" to "Tickling Submission Susan" (4021) using primary name
dmatched performer "Tickling Submission Tickler Sandra" to "Tickling Submission Tickler Sandra" (4022) using primary name
dmatched performer "Tickling Submission Tickler" to "Tickling Submission Tickler" (4023) using primary name
dmatched performer "Tickling Submission Susan" to "Tickling Submission Susan" (4021) using primary name
dmatched performer "Tickling Submission Tickler" to "Tickling Submission Tickler" (4023) using primary name
dmatched performer "Tickling Submission Susan" to "Tickling Submission Susan" (4021) using primary name
dmatched performer "Tickling Submission Sandra" to "Tickling Submission Sandra" (4024) using primary name
dmatched performer "Tickling Submission Anita" to "Tickling Submission Anita" (4025) using primary name
dmatched performer "Tickling Submission Anita" to "Tickling Submission Anita" (4025) using primary name
dmatched perform

In [54]:
# Create column with Stash performer IDs for each release
df_tickling_submissions_joined['stash_performer_ids'] = df_tickling_submissions_joined['performers'].apply(
    lambda performers: [
        stash_performers_by_name_and_id["Tickling Submission " + p["name"]]["id"] 
        for p in performers
    ]
)


In [72]:
import base64

for _, release in df_tickling_submissions_joined.iterrows():
    scene_data = {
        "id": release["stash_scene"]["id"],
        "code": release["culture_extractor_release_short_name"],
        "title": release["culture_extractor_release_name"],
        "date": release["culture_extractor_release_date"].strftime("%Y-%m-%d"),
        "details": release["description"],
        "performer_ids": release["stash_performer_ids"],
        "urls": [release["culture_extractor_release_url"]],
        "stash_ids": [{"endpoint": "https://culture.extractor/graphql", "stash_id": release["culture_extractor_release_uuid"]}]
    }
    
    # Find jpg file in a directory like F:\Ripping\Tickling Submission\Metadata\0192221b-5f5b-75ac-a715-e8292b4262e7\
    metadata_path = f"F:\\Ripping\\Tickling Submission\\Metadata\\{release['culture_extractor_release_uuid']}\\"
    jpg_file = next((f for f in os.listdir(metadata_path) if f.endswith('.jpg')), None)
    if jpg_file:
        with open(f"F:\\Ripping\\Tickling Submission\\Metadata\\{release['culture_extractor_release_uuid']}\\" + jpg_file, "rb") as image_file:
            encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
            scene_data["cover_image"] = f"data:image/jpeg;base64,{encoded_image}"
    
    stash.update_scene(scene_data)


In [4]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query with COALESCE to handle -infinity dates
    query = """
        SELECT sites.name AS site_name,
               releases.uuid AS release_uuid,
               COALESCE(NULLIF(releases.release_date, '-infinity'), '1970-01-01'::date) as release_date,
               releases.short_name AS release_short_name, 
               releases.name AS release_name, 
               releases.url AS release_url,
               releases.json_document AS release_json_document,
               downloads.uuid AS downloads_uuid,
               downloads.file_type,
               downloads.content_type,
               downloads.saved_filename,
               downloads.file_metadata
        FROM releases
        JOIN sites ON releases.site_uuid = sites.uuid
        JOIN downloads ON releases.uuid = downloads.release_uuid
    """
    cursor.execute(query)

    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_releases = pd.DataFrame(results, columns=column_names)
    # df_releases['sha256'] = df_releases['file_metadata'].apply(lambda x: x['sha256Sum'] if isinstance(x, dict) and 'sha256Sum' in x else None)
    df_releases['oshash'] = df_releases['file_metadata'].apply(lambda x: x['oshash'] if isinstance(x, dict) and 'oshash' in x else None)
    
    df_releases = df_releases.copy().add_prefix('culture_extractor_')


KeyboardInterrupt: 

In [11]:
# Get database schema information
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Query to get table information
    table_query = """
        SELECT DISTINCT table_name 
        FROM information_schema.tables 
        WHERE table_schema = 'public' 
        AND table_type = 'BASE TABLE'
        ORDER BY table_name;
    """
    cursor.execute(table_query)
    tables = cursor.fetchall()

    print("Database Schema:")
    for (table_name,) in tables:
        print(f"\nTable: {table_name}")
        
        # Get column information for each table
        column_query = """
            SELECT 
                column_name,
                data_type,
                column_default,
                is_nullable,
                character_maximum_length
            FROM information_schema.columns
            WHERE table_schema = 'public'
            AND table_name = %s
            ORDER BY ordinal_position;
        """
        cursor.execute(column_query, (table_name,))
        columns = cursor.fetchall()
        
        for column in columns:
            column_name, data_type, default, nullable, max_length = column
            nullable_str = "NULL" if nullable == 'YES' else "NOT NULL"
            default_str = f"DEFAULT {default}" if default else ""
            length_str = f"({max_length})" if max_length else ""
            print(f"  {column_name}: {data_type}{length_str} {nullable_str} {default_str}")

    cursor.close()

Database Schema:

Table: __EFMigrationsHistory
  migration_id: character varying(150) NOT NULL 
  product_version: character varying(32) NOT NULL 

Table: downloads
  uuid: uuid NOT NULL 
  downloaded_at: timestamp without time zone NOT NULL 
  file_type: text NOT NULL 
  content_type: text NOT NULL 
  variant: text NOT NULL 
  available_file: json NOT NULL 
  original_filename: text NULL 
  saved_filename: text NULL 
  release_uuid: uuid NOT NULL 
  file_metadata: json NOT NULL DEFAULT '{}'::json

Table: performers
  uuid: uuid NOT NULL 
  short_name: text NULL 
  name: text NOT NULL 
  url: text NULL 
  site_uuid: uuid NOT NULL 

Table: release_entity_site_performer_entity
  performers_uuid: uuid NOT NULL 
  releases_uuid: uuid NOT NULL 

Table: release_entity_site_tag_entity
  releases_uuid: uuid NOT NULL 
  tags_uuid: uuid NOT NULL 

Table: releases
  uuid: uuid NOT NULL 
  release_date: date NOT NULL 
  short_name: text NOT NULL 
  name: text NOT NULL 
  url: text NOT NULL 
  desc

In [44]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query with COALESCE to handle -infinity dates
    query = """
        SELECT sites.name AS site_name,
               releases.uuid AS release_uuid,
               COALESCE(NULLIF(releases.release_date, '-infinity'), '1970-01-01'::date) as release_date,
               releases.short_name AS release_short_name, 
               releases.name AS release_name, 
               releases.url AS release_url,
               releases.json_document AS release_json_document,
               downloads.uuid AS downloads_uuid,
               downloads.file_type,
               downloads.content_type,
               downloads.saved_filename,
               downloads.file_metadata
        FROM releases
        JOIN sites ON releases.site_uuid = sites.uuid
        JOIN downloads ON releases.uuid = downloads.release_uuid
        WHERE file_type = 'zip' AND content_type = 'gallery' AND sites.name = 'Femjoy'
    """
    cursor.execute(query)

    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_releases = pd.DataFrame(results, columns=column_names)
    # df_releases['sha256'] = df_releases['file_metadata'].apply(lambda x: x['sha256Sum'] if isinstance(x, dict) and 'sha256Sum' in x else None)
    # df_releases['oshash'] = df_releases['file_metadata'].apply(lambda x: x['oshash'] if isinstance(x, dict) and 'oshash' in x else None)
    
    
    df_releases = df_releases.copy().add_prefix('culture_extractor_')

# Extract model names from json_document
def extract_model_names(json_doc):
    try:
        if isinstance(json_doc, str):
            import json
            json_doc = json.loads(json_doc)
            
        if isinstance(json_doc, dict) and 'models' in json_doc:
            return [model['name'] for model in json_doc['models']]
        return []
    except:
        return []

# Add new column with model names
df_releases['culture_extractor_models'] = df_releases['culture_extractor_release_json_document'].apply(extract_model_names)

# If you want the names as a comma-separated string instead of a list
df_releases['culture_extractor_models_string'] = df_releases['culture_extractor_models'].apply(lambda x: ', '.join(x) if x else '')

df_releases = df_releases[['culture_extractor_release_uuid', 'culture_extractor_release_date', 'culture_extractor_release_name', 'culture_extractor_models', 'culture_extractor_models_string']]


In [313]:
target_uuid = '01928448-5251-716f-8499-d5c7a99a1e48'
df_releases[df_releases['culture_extractor_release_uuid'].astype(str) == target_uuid]

Unnamed: 0,culture_extractor_release_uuid,culture_extractor_release_date,culture_extractor_release_name,culture_extractor_models,culture_extractor_models_string


In [310]:
# Copy culture_extractor_release_uuid from releases which have a specific model name
model_name = "Marria Leeah"
df_releases_with_model_name = df_releases[df_releases['culture_extractor_models_string'].str.contains(model_name)]
df_releases_with_model_name = df_releases_with_model_name[['culture_extractor_release_uuid', 'culture_extractor_release_name', 'culture_extractor_models_string']]
df_releases_with_model_name.columns = ['release_uuid', 'release_name', 'model_name']

df_releases_with_model_name

Unnamed: 0,release_uuid,release_name,model_name
1665,01928905-8770-7594-a564-f62d835799c3,Flowers in the hair,Marria Leeah
1666,01928905-d04c-71e9-9892-02f5c52924e7,Submissive,Marria Leeah
1669,01928906-b02e-757b-b3a4-c4f98264408a,Busty,Marria Leeah
1671,01928907-34c1-7051-abd7-d3fd67d03700,Caramba,Marria Leeah
1672,01928907-7fca-7312-b610-185a6bda7fb8,On stage,Marria Leeah


In [311]:
df_releases_with_model_name["release_uuid"].to_clipboard(index=False, header=False, sep='|')

In [14]:
with psycopg.connect(connection_string) as conn:
    # Create a cursor object
    cursor = conn.cursor()

    # Execute the query
    query = """
        SELECT sub_sites.*, sites.name AS site_name
        FROM sub_sites
        JOIN sites ON sub_sites.site_uuid = sites.uuid
        ORDER BY sub_sites.name
    """
    cursor.execute(query)

    # Fetch all the results
    results = cursor.fetchall()

    # Get the column names from cursor.description
    column_names = [desc[0] for desc in cursor.description]

    # Close the cursor
    cursor.close()

    # No need to manually close the connection when using 'with'

    # Convert the results to a data frame with column names
    df_subsites = pd.DataFrame(results, columns=column_names)


In [33]:
from libraries.client_stashapp import get_stashapp_client

stash = get_stashapp_client()

def get_parent_studio_id(studio):
    if studio["parent_studio"] is not None:
        return studio["parent_studio"]["id"]
    return None


dUsing stash (v0.27.2-37-g0621d871) endpoint at http://localhost:6969/graphql
dPersisting Connection to Stash with ApiKey...


In [8]:
studios = stash.find_studios({})
df_stash_studios = pd.DataFrame(studios)
df_stash_studios["parent_studio_id"] = df_stash_studios.apply(get_parent_studio_id, axis=1)

# Lookup functions
def get_studio_by_id(studio_id):
    return df_stash_studios[df_stash_studios["id"] == studio_id]

def get_studio_by_name(studio_name):
    return df_stash_studios[df_stash_studios["name"] == studio_name]

In [17]:
# Create new dataframes with prefixed column names
df_sites_prefixed = df_sites.add_prefix('culture_extractor_')
df_stash_studios_prefixed = df_stash_studios.add_prefix('stash_')

# Merge the prefixed dataframes
df_matched_studios = pd.merge(df_stash_studios_prefixed, df_sites_prefixed, 
                              left_on='stash_name', 
                              right_on='culture_extractor_name', 
                              how='inner')

In [None]:
# Store Culture Extractor UUID in Stash studio
name = "SexArt"

df_matched_studio = df_matched_studios[df_matched_studios["stash_name"] == name]
if not df_matched_studio.empty:
    df_matched_studio = df_matched_studio.iloc[0]
else:
    print(f"No studio found with name: {name}")
    raise ValueError(f"No studio found with name: {name}")

refreshed_studio = stash.find_studio(name)
assert refreshed_studio is not None, f"No studio found with name: {name}"

stashbox_ids = refreshed_studio["stash_ids"]
culture_extractor_endpoint = "https://culture.extractor/graphql"
culture_extractor_uuid = str(df_matched_studio["culture_extractor_uuid"])
existing_stash_id = next((stash_id for stash_id in stashbox_ids if stash_id["endpoint"] == culture_extractor_endpoint), None)

if existing_stash_id:
    if existing_stash_id["stash_id"] != culture_extractor_uuid:
        existing_stash_id["stash_id"] = culture_extractor_uuid
        stash.update_studio({"id": refreshed_studio["id"], "stash_ids": stashbox_ids})
        print(f"Updated stash_id for {culture_extractor_endpoint}")
else:
    stashbox_ids.append({"endpoint": culture_extractor_endpoint, "stash_id": culture_extractor_uuid})
    stash.update_studio({"id": refreshed_studio["id"], "stash_ids": stashbox_ids})
    print(f"Added new stash_id for {culture_extractor_endpoint}")


In [None]:
# Get studio for scene matching
stash_site_name = 'Slayed'

current_studio = get_studio_by_name(stash_site_name)

if isinstance(current_studio, pd.DataFrame) and not current_studio.empty:
    current_studio = current_studio.iloc[0].to_dict()

culture_extractor_site_uuid = None
if isinstance(current_studio, dict) and "stash_ids" in current_studio:
    for stash_id in current_studio["stash_ids"]:
        if isinstance(stash_id, dict) and stash_id.get("endpoint") == "https://culture.extractor/graphql":
            culture_extractor_site_uuid = stash_id.get("stash_id")
            break

assert culture_extractor_site_uuid is not None, f"No Culture Extractor site uuid found for {stash_site_name}"
print(f"Matched Stash studio {stash_site_name} to Culture Extractor site uuid {culture_extractor_site_uuid}")


In [130]:
delete_uuid = "018b8e83-e2e3-718e-966d-c4f745149c79"

with psycopg.connect(connection_string) as conn:
    cursor = conn.cursor()
    cursor.execute("DELETE FROM releases WHERE uuid = %s", (delete_uuid,))
    cursor.close()


In [37]:
# Get scenes from Culture Extractor
query_template = """
    SELECT
        sites.uuid AS site_uuid,
        sites.short_name AS site_short_name,
        sites.name AS site_name,
        
        releases.uuid AS release_uuid,
        releases.release_date AS release_date,
        releases.short_name AS release_short_name,
        releases.name AS release_name,
        releases.url AS release_url,
        releases.json_document AS release_json_document,
        downloads.uuid AS downloads_uuid,
        downloads.downloaded_at AS downloads_downloaded_at,
        downloads.variant AS downloads_variant,
        downloads.file_type AS downloads_file_type,
        downloads.content_type AS downloads_content_type,
        downloads.file_metadata AS downloads_file_metadata
    FROM releases
    JOIN sites ON releases.site_uuid = sites.uuid
    JOIN downloads ON releases.uuid = downloads.release_uuid
    WHERE
        sites.uuid = '%s' AND
        downloads.file_type = 'video' AND
        downloads.content_type = 'scene' AND
        (downloads.variant NOT IN ('480x270', '270p', '320p', '360p', '270p MOBILE'));
    """
query = query_template % culture_extractor_site_uuid

with psycopg.connect(connection_string) as conn:
    cursor = conn.cursor()
    cursor.execute(query)
    results = cursor.fetchall()
    column_names = [desc[0] for desc in cursor.description]
    cursor.close()

df_culture_extractor_scenes = pd.DataFrame(results, columns=column_names)
df_culture_extractor_scenes["culture_extractor_duration"] = df_culture_extractor_scenes["downloads_file_metadata"].apply(lambda x: x["duration"] if isinstance(x, dict) and "duration" in x else None)
df_culture_extractor_scenes["culture_extractor_phash"] = df_culture_extractor_scenes["downloads_file_metadata"].apply(lambda x: x["phash"] if isinstance(x, dict) and "phash" in x else None)
df_culture_extractor_scenes

NameError: name 'culture_extractor_site_uuid' is not defined

In [None]:
df_nonunique_release_short_name = df_culture_extractor_scenes[df_culture_extractor_scenes.duplicated(subset=['release_short_name'], keep=False)]
df_nonunique_release_short_name


In [55]:
# Get scenes from Stash
stash_scenes = stash.find_scenes(
    # {
    #     "studios": { "value": [current_studio["id"]], "excludes": [], "modifier": "INCLUDES", "depth": -1 }
    # },
    # filter={
    #     "per_page": 500, "page": 1, "sort": "path", "direction": "DESC"
    # },
    fragment="id title code date files { id path basename fingerprints { type value } format width height video_codec frame_rate duration } studio { id name tags { id name } } performers { id name gender tags { id name} } stash_ids { endpoint stash_id } urls"
)
df_stash_scenes = pd.DataFrame(stash_scenes)

def get_endpoint_stash_id(stash_ids, endpoint):
    for stash_id in stash_ids:
        if stash_id["endpoint"] == endpoint:
            return stash_id["stash_id"]
    return None

def get_tpdb_id(stash_ids):
    return get_endpoint_stash_id(stash_ids, "https://theporndb.net/graphql")

def get_stashdb_id(stash_ids):
    return get_endpoint_stash_id(stash_ids, "https://stashdb.org/graphql")

def get_culture_extractor_id(stash_ids):
    return get_endpoint_stash_id(stash_ids, "https://culture.extractor/graphql")

df_stash_scenes["date"] = pd.to_datetime(df_stash_scenes["date"])
df_stash_scenes["stashdb_id"] = df_stash_scenes["stash_ids"].apply(get_stashdb_id)
df_stash_scenes["tpdb_id"] = df_stash_scenes["stash_ids"].apply(get_tpdb_id)
df_stash_scenes["culture_extractor_id"] = df_stash_scenes["stash_ids"].apply(get_culture_extractor_id)
df_stash_scenes["stash_duration"] = df_stash_scenes["files"].apply(lambda x: x[0]["duration"])
df_stash_scenes["stash_phash"] = df_stash_scenes["files"].apply(lambda x: next((y["value"] for y in x[0]["fingerprints"] if y["type"] == "phash"), None))
df_stash_scenes["stash_oshash"] = df_stash_scenes["files"].apply(lambda x: next((y["value"] for y in x[0]["fingerprints"] if y["type"] == "oshash"), None))


In [None]:
# Check for duplicate scenes based on StashDB ID
df_duplicate_stash_scenes = df_stash_scenes[
    (df_stash_scenes['stashdb_id'].notna()) &
    (df_stash_scenes.duplicated(subset=['stashdb_id'], keep=False))
]
df_duplicate_stash_scenes


In [56]:
# Add the duplicate tag to the scenes
duplicate_stashdb_ids_tag = stash.find_tag("StashDB: Duplicate Scenes Based On ID")
duplicate_stashdb_ids_tag

for index, row in df_duplicate_stash_scenes.iterrows():
    refreshed_scene = stash.find_scene(row["id"])
    existing_tag_ids = [tag["id"] for tag in refreshed_scene["tags"]]
    if duplicate_stashdb_ids_tag["id"] not in existing_tag_ids:
        updated_tag_ids = existing_tag_ids + [duplicate_stashdb_ids_tag["id"]]
        stash.update_scene({ "id": row["id"], "tag_ids": updated_tag_ids })


# Matching existing scenes

In [56]:
# Match Stash and Culture Extractor scenes based on oshash
df_merged_by_oshash = pd.merge(df_stash_scenes, df_releases, 
                               left_on='stash_oshash', right_on='culture_extractor_oshash', 
                               how='inner')

df_merged_by_oshash = df_merged_by_oshash[
    df_merged_by_oshash['culture_extractor_id'].str.strip() != df_merged_by_oshash['culture_extractor_release_uuid'].astype(str).str.strip()
]
len(df_merged_by_oshash)


4432

In [57]:
# Match first x rows from df_merged_by_oshash to df_stash_scenes
for index, row in df_merged_by_oshash[df_merged_by_oshash['culture_extractor_site_name'] == "It's POV"].iterrows():
    refreshed_scene = stash.find_scene(row["id"])

    release_uuid = row["culture_extractor_release_uuid"]
    if pd.isnull(release_uuid):
        continue

    existing_stash_ids = refreshed_scene["stash_ids"]
    if "https://culture.extractor/graphql" not in [stash_id_obj["endpoint"] for stash_id_obj in existing_stash_ids]:
        existing_stash_ids.append({ "endpoint": "https://culture.extractor/graphql", "stash_id": str(release_uuid) })
        id = row["id"]
        stash.update_scene({ "id": id, "stash_ids": existing_stash_ids })
        print(f"Updated scene {id} with Culture Extractor ID {release_uuid}")


Updated scene 22834 with Culture Extractor ID 01908909-0d7b-76fe-9b1d-2eb0fe73c508
Updated scene 22722 with Culture Extractor ID 01908908-226a-774b-8ec1-414117cde2f5
Updated scene 22790 with Culture Extractor ID 0190890f-154a-7600-a6b0-ef352b6a0179
Updated scene 22793 with Culture Extractor ID 01908946-6fe7-746d-a8f0-7a5b7029672d
Updated scene 22796 with Culture Extractor ID 01908896-8922-736e-9f0b-e64317202ccf
Updated scene 22840 with Culture Extractor ID 019088a2-b487-76dd-981a-5fffec07ab6c
Updated scene 22848 with Culture Extractor ID 01908893-e90a-7116-a89d-a79fcb801b26
Updated scene 23101 with Culture Extractor ID 01908919-761a-764c-a9b2-6d8d485c1305
Updated scene 23102 with Culture Extractor ID 0190890e-4aec-73ba-bda6-0eea9383ded2
Updated scene 23192 with Culture Extractor ID 0190893b-7adf-7028-a0ad-3c62fe846620
Updated scene 23337 with Culture Extractor ID 01908945-a8ac-76d7-a895-b7e87c9afff7
Updated scene 22670 with Culture Extractor ID 01908939-3b18-7255-81bd-c18225a28112
Upda

In [135]:
# Match Stash and Culture Extractor scenes based on phash
df_culture_extractor_scenes['release_date'] = pd.to_datetime(df_culture_extractor_scenes['release_date'])

# Merge the dataframes
df_merged_scenes = pd.merge(df_stash_scenes, df_culture_extractor_scenes, 
                            left_on='stash_phash', right_on='culture_extractor_phash', 
                            how='left')

df_merged_matched_scenes = df_merged_scenes.copy()
df_merged_matched_scenes = df_merged_matched_scenes[df_merged_matched_scenes["release_uuid"].notnull()]

df_merged_matched_scenes["duration_difference"] = df_merged_matched_scenes["stash_duration"] - df_merged_matched_scenes["culture_extractor_duration"]
df_merged_matched_scenes["phash_distance"] = df_merged_matched_scenes.apply(lambda row: calculate_hamming_distance(row["stash_phash"], row["culture_extractor_phash"]), axis=1)
df_merged_matched_scenes["title_levenshtein"] = df_merged_matched_scenes.apply(lambda row: levenshtein(row["title"], row["release_name"]), axis=1)

In [None]:
# Update Stash scenes with Culture Extractor ID
for index, row in df_merged_matched_scenes.iterrows():
    refreshed_scene = stash.find_scene(row["id"])

    release_uuid = row["release_uuid"]
    if pd.isnull(release_uuid):
        continue

    existing_stash_ids = refreshed_scene["stash_ids"]
    if "https://culture.extractor/graphql" not in [stash_id_obj["endpoint"] for stash_id_obj in existing_stash_ids]:
        existing_stash_ids.append({ "endpoint": "https://culture.extractor/graphql", "stash_id": str(release_uuid) })
        id = row["id"]
        code = row["release_short_name"]
        stash.update_scene({ "id": id, "code": code, "stash_ids": existing_stash_ids })
        print(f"Updated scene {id} with Culture Extractor ID {release_uuid}")


In [None]:
# Create a new column 'new_urls' where 'release_url' is appended to 'urls' if not already present
df_merged_scenes['new_urls'] = df_merged_scenes.apply(lambda row: row['urls'] + [row['release_url']] if row['release_url'] not in row['urls'] else row['urls'], axis=1)

df_merged_scenes[["id", "title", "new_urls"]]

In [None]:
# Create a new column 'new_urls' where 'release_url' is appended to 'urls' if not already present
df_merged_scenes['new_stash_ids'] = df_merged_scenes.apply(lambda row:
                                                           row['stash_ids'] + [{ "endpoint": "https://culture.extractor/graphql", "stash_id": str(row["release_uuid"]) }] if "https://culture.extractor/graphql" not in [stash_id_obj["endpoint"] for stash_id_obj in row['stash_ids']] else row['stash_ids'],
                                                           axis=1)
df_merged_scenes[["new_stash_ids"]].values


In [None]:
for index, row in df_merged_scenes.iterrows():
    id = row["id"]
    old_urls = row["urls"]
    new_urls = row["new_urls"]
    old_urls_set = set(old_urls)
    new_urls_set = set(new_urls)
    if old_urls_set != new_urls_set:
        print(row["title"])
        print(old_urls_set)
        print(new_urls_set)
        stash.update_scene({ "id": id, "urls": new_urls })

    # stash_ids = row["stash_ids"]
    # new_stash_ids = row["new_stash_ids"]
    # stash_ids_set = set(stash_id_obj["stash_id"] for stash_id_obj in row["stash_ids"])
    # new_stash_ids_set = set(stash_id_obj["stash_id"] for stash_id_obj in row["new_stash_ids"])
    # if stash_ids_set != new_stash_ids_set:
    #     print(row["title"])
    #     print(stash_ids_set)
    #     print(new_stash_ids_set)
    #     stash.update_scene({ "id": id, "stash_ids": new_stash_ids })

# Import new scenes

In [23]:
import os
import pandas as pd
from pathlib import Path
import re

# Define the root directory
root_dir = Path(r"F:\Ripping\Tickling Submission\Metadata")

# Initialize lists to store data
data = []

# Walk through the directory structure
for release_dir in root_dir.iterdir():
    if release_dir.is_dir():
        release_uuid = release_dir.name
        preview_image = None
        full_scene = None
        trailer = None

        # Check each file in the release directory
        for file in release_dir.iterdir():
            if file.suffix.lower() == '.jpg':
                preview_image = file.name
            elif file.suffix.lower() in ['.wmv', '.mp4']:
                try:
                    file_stem = file.stem  # Get filename without extension
                    if file_stem.endswith(release_uuid):
                        trailer = file.name
                    elif re.search(r'- \d+x\d+$', file_stem):
                        full_scene = file.name
                except OSError as e:
                    print(f"Error accessing file: {file}. Error: {e}")
                    continue

        # Append data to the list
        data.append({
            'release_uuid': release_uuid,
            'preview_image': preview_image,
            'full_scene': full_scene,
            'trailer': trailer
        })

# Create a DataFrame
df_files = pd.DataFrame(data)


In [24]:
df_merged = pd.merge(df_files, df_culture_extractor_scenes, 
                     left_on='release_uuid', 
                     right_on='release_uuid', 
                     how='left')


In [None]:
# Get release_uuid values where full_scene is missing
missing_full_scene = df_merged[df_merged['full_scene'].isnull()]['release_uuid'].tolist()

# Format the list for VS Code breakpoint condition
breakpoint_condition = f"release_id in {missing_full_scene}"

print("VS Code breakpoint condition:")
print(breakpoint_condition)


In [None]:
len(missing_full_scene)

# Trailers

In [None]:
studio_name = "Vixen"

studio_for_trailers = stash.find_studio(studio_name)
scenes_for_trailers = stash.find_scenes(
    {
        "studios": {
            "value": [studio_for_trailers["id"]],
            "excludes": [],
            "modifier": "INCLUDES",
            "depth": -1
        }
    },
    fragment="id title code date files { id path basename fingerprints { type value } format width height video_codec frame_rate duration } studio { id name tags { id name } } performers { id name gender tags { id name} } stash_ids { endpoint stash_id } urls"
)

df_stash_scenes_for_trailers = pd.DataFrame(scenes_for_trailers)
df_stash_scenes_for_trailers["stash_oshash"] = df_stash_scenes_for_trailers["files"].apply(lambda x: next((y["value"] for y in x[0]["fingerprints"] if y["type"] == "oshash"), None))
df_stash_scenes_for_trailers["culture_extractor_uuid"] = df_stash_scenes_for_trailers["stash_ids"].apply(lambda x: get_culture_extractor_id(x))

In [None]:
trailer_tag = stash.find_tag("Trailer Associated", create=True)


In [None]:
import shutil

# Source paths
culture_extractor_trailer_base_path_d = f"D:\\Ripping\\{studio_name}\\Metadata\\"
culture_extractor_trailer_base_path_f = f"F:\\Ripping\\{studio_name}\\Metadata\\"

# Target path
stash_trailer_base_path = "H:\\Stash\\generated\\trailers"

for index, row in df_stash_scenes_for_trailers.iterrows():
    scene_id = row["id"]
    scene_oshash = row["stash_oshash"]
    culture_extractor_uuid = row["culture_extractor_uuid"]

    stash_trailer_path = os.path.join(stash_trailer_base_path, f"{scene_oshash}.mp4")
    if os.path.exists(stash_trailer_path):
        refreshed_scene = stash.find_scene(scene_id)
        existing_tag_ids = [tag["id"] for tag in refreshed_scene["tags"]]
        if trailer_tag["id"] not in existing_tag_ids:
            updated_tag_ids = existing_tag_ids + [trailer_tag["id"]]
            stash.update_scene({ "id": scene_id, "tag_ids": updated_tag_ids })
        
        print(f"Stash: Trailer already exists for scene {scene_id} at {stash_trailer_path}")
        continue
    
    trailer_filename_candidates = ["trailer_2160.mp4", "trailer_1080.mp4"]
    trailer_candidate_paths_d = [os.path.join(culture_extractor_trailer_base_path_d, f"{culture_extractor_uuid}", filename) for filename in trailer_filename_candidates]
    trailer_candidate_paths_f = [os.path.join(culture_extractor_trailer_base_path_f, f"{culture_extractor_uuid}", filename) for filename in trailer_filename_candidates]    
    matching_trailer_path = next((path for path in trailer_candidate_paths_d + trailer_candidate_paths_f if os.path.exists(path)), None)
    if matching_trailer_path:
        shutil.copy(matching_trailer_path, stash_trailer_path)
        print(f"Copied trailer for scene {scene_id} to {stash_trailer_path}")
        refreshed_scene = stash.find_scene(scene_id)
        existing_tag_ids = [tag["id"] for tag in refreshed_scene["tags"]]
        if trailer_tag["id"] not in existing_tag_ids:
            updated_tag_ids = existing_tag_ids + [trailer_tag["id"]]
            stash.update_scene({ "id": scene_id, "tag_ids": updated_tag_ids })
    else:
        print(f"No trailer found for scene {scene_id}")
        continue
