In [1]:
import os
import praw
import dotenv

dotenv.load_dotenv()

reddit = praw.Reddit(client_id = os.getenv("REDDIT_CLIENT_ID"), client_secret = os.getenv("REDDIT_CLIENT_SECRET"), password = os.getenv("REDDIT_CLIENT_PASSWORD"), user_agent = os.getenv("REDDIT_CLIENT_USER_AGENT"), username = os.getenv("REDDIT_CLIENT_USERNAME"))

In [None]:
# Create Stash app client
import dotenv

from libraries.client_stashapp import get_stashapp_client

dotenv.load_dotenv()

stash = get_stashapp_client("AURAL_")

In [6]:
# Use Beautiful Soup to find all external URLs in the HTML
from bs4 import BeautifulSoup
import json
import os
from datetime import datetime
import re
import requests
import subprocess

link_source_enum = {"unknown": 0, "audio": 1, "script": 2, "reddit_script": 3}
file_type_enum = {"audio": 1, "script": 2}

save_path = "F:\\GWA"
os.makedirs(save_path, exist_ok=True)


def download_reddit_post(url):
    files = []
    
    post_id = extract_post_id(url)
    submission = reddit.submission(id=post_id)
    post_data = extract_post_data(submission)

    audio_links = [link for link in post_data["links"] if link["type"] == link_source_enum["audio"]]

    for audio_link in audio_links:
        if "soundgasm.net" in audio_link["url"]:
            (audio_filepath, audio_data) = download_soundgasm_audio(post_data, audio_link["url"])
            files.append({ "type": file_type_enum["audio"], "filepath": audio_filepath, "data": audio_data })

    script_links = [link for link in post_data["links"] if link["type"] == link_source_enum["script"]]

    for script_link in script_links:
        if "scriptbin.works" in script_link["url"]:
            (script_filepath, script_data) = download_scriptbin_script(post_data, script_link["url"])
            files.append({ "type": file_type_enum["script"], "filepath": script_filepath, "data": script_data })
    
    json_data = {
        "url": url,
        "reddit": post_data,
        "files": files
    }

    # Create filename using post metadata
    filename = post_data["base_filename"] + ".json"

    # Save to JSON file
    filepath = os.path.join(save_path, filename)
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(json_data, f, indent=2)
    
    return json_data

def extract_post_id(url):
    # Match patterns like /comments/xxxxx/ or /xxxxx
    patterns = [
        r"/comments/([a-z0-9]+)/?",
        r"reddit\.com/r/[^/]+/([a-z0-9]+)/?",
    ]

    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)

    raise ValueError("Could not extract post ID from URL")


def extract_post_data(post):
    post_data = {
        "base_filename": format_filename(post.author.name, post.created_utc, post.id),
        "title": post.title,
        "author": {
            "name": post.author.name if post.author else "[deleted]",
            "fullname": post.author.fullname if post.author else "[deleted]",
            "flair": post.author_flair_text,
        },
        "url": post.url,
        "created_utc": post.created_utc,
        "id": post.id,
        "permalink": f"https://reddit.com{post.permalink}",
        "score": post.score,
        "subreddit": post.subreddit.display_name,
        "selftext": post.selftext,
        "selftext_html": post.selftext_html,
        "links": extract_post_links(post.selftext_html),
    }
    
    reddit_script_links = [link for link in post_data["links"] if link["type"] == link_source_enum["reddit_script"]]
    if len(reddit_script_links) == 1:
        reddit_script_id = extract_post_id(reddit_script_links[0]["url"])
        reddit_script_data = extract_reddit_script_data(reddit_script_id)
        
        for script_link in reddit_script_data["links"]:
            if script_link["type"] == link_source_enum["script"]:
                post_data["links"].append(script_link)
        
        post_data["reddit_script"] = reddit_script_data
    elif len(reddit_script_links) > 1:
        raise ValueError("Multiple Reddit script links found in post")
    
    return post_data

def extract_reddit_script_data(post_id):
    submission = reddit.submission(id=post_id)
    script_post_data = {
        "title": submission.title,
        "author": {
            "name": submission.author.name if submission.author else "[deleted]",
            "fullname": submission.author.fullname if submission.author else "[deleted]",
            "flair": submission.author_flair_text,
        },
        "url": submission.url,
        "created_utc": submission.created_utc,
        "id": submission.id,
        "permalink": f"https://reddit.com{submission.permalink}",
        "score": submission.score,
        "subreddit": submission.subreddit.display_name,
        "selftext": submission.selftext,
        "selftext_html": submission.selftext_html,
        "links": extract_post_links(submission.selftext_html),
    }
    return script_post_data

def extract_post_links(html):
    soup = BeautifulSoup(html, "html.parser")
    links = [
        {
            "url": link.get("href"),
            "text": link.text,
            "type": determine_link_type(link.text, link.get("href")),
        }
        for link in soup.find_all("a", href=True)
    ]
    return links

def determine_link_type(text, url):
    if "SOUNDGASM" in url.upper():
        return link_source_enum["audio"]
    elif "SCRIPTBIN" in url.upper():
        return link_source_enum["script"]
    elif "SCRIPT" in text.upper():
        return link_source_enum["reddit_script"]
    else:
        return link_source_enum["unknown"]

def format_filename(author, created_utc, post_id):
    # Convert UTC timestamp to datetime
    date_str = datetime.fromtimestamp(created_utc).strftime("%Y-%m-%d")
    # Create safe filename
    author = "".join(c for c in author if c.isalnum() or c in ("-", "_"))
    return f"{author}_{date_str}_{post_id}"

def determine_performer_gender(author_flair):
    if "FEMALE" in author_flair.upper():
        return "FEMALE"
    else:
        raise ValueError(f"Unknown gender for author flair: {author_flair}")

def convert_audio_to_video(input_file, output_file):
    ffmpeg_path = "ffmpeg"
    command = [
        ffmpeg_path,
        "-loop", "1",
        "-i", "X:\\gwa.png",
        "-i", input_file,
        "-c:v", "libx264",
        "-c:a", "copy",
        "-shortest",
        "-vf", "scale=256:256",
        output_file
    ]
    subprocess.run(command, check=True)

def download_soundgasm_audio(post_data, url):
    if not "soundgasm.net" in url:
        raise ValueError(f"URL is not a Soundgasm URL: {url}")
    
    # Get and parse the soundgasm page
    response = requests.get(url)
    soundgasm_soup = BeautifulSoup(response.content, 'html.parser')
        
    # Find the audio source element
    media_url_match = re.search(r'(https://media\.soundgasm\.net/sounds/[a-zA-Z0-9]+\.(?:m4a|mp3))', response.text)
    media_url = media_url_match.group(1) if media_url_match else None
    
    if not media_url:
        raise ValueError(f"No audio element found for {url}")
    
    media_url = media_url
    
    # Get filename from URL
    filename = os.path.basename(media_url)
    filepath = os.path.join(save_path, post_data["base_filename"] + "_" + filename)
    
    # Download the audio file
    audio_content = requests.get(media_url).content
    with open(filepath, "wb") as f:
        f.write(audio_content)

    converted_filepath = filepath.replace(".m4a", ".mp4")
    convert_audio_to_video(filepath, converted_filepath)
    os.remove(filepath)
    
    process = subprocess.run(
        ['C:\\Tools\\videohashes-windows-amd64.exe',  '-json', '-md5', converted_filepath,], 
        capture_output=True,  # Captures both stdout and stderr
        text=True  # Returns strings instead of bytes
    )
    assert process.returncode == 0, f"Failed to run videohashes: {process.stderr}"
    videohashes_data = json.loads(process.stdout)
    
    soundgasm_title_element = soundgasm_soup.find('div', {'class': 'jp-title'})
    soundgasm_title = soundgasm_title_element.text.strip() if soundgasm_title_element else None
    soundgasm_description_element = soundgasm_soup.find('div', {'class': 'jp-description'})
    soundgasm_description = soundgasm_description_element.text.strip() if soundgasm_description_element else None
    
    metadata = {
        "soundgasm": {
            "media_url": media_url,
            "title": soundgasm_title,
            "description": soundgasm_description
        },
        "videohashes": videohashes_data
    }
    
    # Save similar submission JSON to filename.json
    with open(f"{filepath}.json", "w") as f:
        json.dump(metadata, f)
    
    return (converted_filepath, metadata)
        
def download_scriptbin_script(post_data,url):
    cookies = {
        "ta": "y",
        "sbw.af": "CfDJ8MlNLnNvdf9Ll4Zq18iJsFwTaln_nTYl_FT-Xl6UpDBYBtNSuIuKvn68VIbziE4xd1zG1s2h9GqVyvCCR5a0iQ4GNGGHSPQZNvaTQQfc453ew8LyIH8FtVrGhbBJXOHgv73z9lqYnf5FbeppySm32DA",
        "auth": "eyJuIjoiRUVzZmMvaFFzNVlLc1hKdSIsInAiOiJPS0FjWXRINDBxZHpwV29SektLL3dJL0NQSWpuTGRBTVU2QVNhU0M5SERHdWQ5VithVVlHdmZuOUdyWlpWc1BBNGRpOTJXVDRNMFl1azBIUHN6bFBSM0U9In0%3D",
        "authpl": "1",
        "sbw.ses": "CfDJ8MlNLnNvdf9Ll4Zq18iJsFzMzs%2BQKGDUdtevrXEl1xGWYKZn6cHnSdto06QeYAd0JxaZJIG9ERbyZlUCtSA6qQMAEdIKW4OOV1wiWFkUxIdNGidd4cRVY3f0vrs2Xu74EeFheuRlpKP4jWxQ4X8zzeSHj1LDf7VIsN4e19crav0c"
    }

    session = requests.Session()
    session.cookies.update(cookies)

    scriptbin_content = session.get(url)
    scriptbin_content.text

    script = {
        "scriptbin": {
            "script_source_url": url,
            "script_content": scriptbin_content.text
        }
    }
    
    filepath = os.path.join(save_path, post_data["base_filename"] + "_script.json")
    with open(filepath, "w") as f:
        json.dump(script, f)
        
    return (filepath, script)


In [None]:
args_url = "https://old.reddit.com/r/gonewildaudio/comments/1eo25f0/f4m_girlfriend_gives_a_impromptu_encore/"
download_reddit_post(args_url)

In [91]:
# stash_performer = stash.find_performer({ 
#     "name": submission.author.name,
#     "gender": determine_performer_gender(submission.author_flair_text),
#     "image": submission.author.icon_img
# }, create=True)
# stash_performer


In [None]:
import subprocess

# Method 1: Capture both stdout and stderr


print("STDOUT:", process.stdout)
print("STDERR:", process.stderr)
