In [None]:
import os
import json
import pandas as pd
import hashlib
import string
import random
from pydub import AudioSegment

json_files = [
    os.path.join(root, fname)
    for root, _, files in os.walk(os.path.join("..","Themes"))
    for fname in files
    if fname.endswith(".json")
]
json_files

In [None]:
def get_audio_file(line_text,audio_dir):
    line_key = line_text.translate(str.maketrans('', '', string.punctuation)).replace(' ', '').lower()
    line_hash = hashlib.sha256(line_key.encode('utf-8')).hexdigest()
    return os.path.join(audio_dir, f"{line_hash}.mp3")

# text is somewhat normalized to help prevent duplication of audio
print(get_audio_file("I trust Master's version of events completely.","..\\audio"))
print(get_audio_file("i trust Masters version of events completely","..\\audio"))

In [None]:
for file in json_files:
    theme = file.rsplit("\\",1)[1].rsplit(".",1)[0]
    theme_data = json.load(open(file))
    df = pd.DataFrame(theme_data)
    
    difficulties = ["BASIC", "LIGHT", "MODERATE", "DEEP", "EXTREME"]
    dominants = ["Master", "Mistress", None]
    subjects = ["Bambi", None]
    for dom in dominants:
        for sub in subjects:
            dom_condition = (df["dominant"].isnull()) if dom is None else ((df["dominant"] == dom) | (df["dominant"].isnull()))
            sub_condition = (df["subject"].isnull()) if sub is None else ((df["subject"] == sub) | (df["subject"].isnull()))
            new_final_audio = AudioSegment.silent(duration=0)
            for diff in difficulties:
                lines = df.loc[dom_condition & sub_condition & (df["difficulty"] == diff), "line"].tolist()
                random.shuffle(lines)
                for line_text in lines:
                    audio_file = get_audio_file(line_text, "..\\audio")
                    try:
                        segment = AudioSegment.from_mp3(audio_file)
                        new_final_audio += segment
                    except:
                        print(f"Could not find script audio for line: {line_text}")
            new_final_audio.export(f"{theme}_Dom-{dom or 'None'}_Sub-{sub or 'None'}_Progression_Demo.mp3", format="mp3")


**Testing Below**

In [4]:
theme_file = json_files[0]
theme_file

In [None]:
theme_data = json.load(open(theme_file))
df = pd.DataFrame(theme_data)
df

In [None]:
print(df.difficulty.unique())
print(df.theme.unique())

In [8]:
import random
from pydub import AudioSegment

final_audio = AudioSegment.silent(duration=0)
difficulties = ["BASIC", "LIGHT", "MODERATE", "DEEP", "EXTREME"]
dominants = ["Master", "Mistress", None]
subjects = ["Bambi", None]
for dom in dominants:
    for sub in subjects:
        dom_condition = (df["dominant"].isnull()) if dom is None else ((df["dominant"] == dom) | (df["dominant"].isnull()))
        sub_condition = (df["subject"].isnull()) if sub is None else ((df["subject"] == sub) | (df["subject"].isnull()))
        new_final_audio = AudioSegment.silent(duration=0)
        for diff in difficulties:
            lines = df.loc[dom_condition & sub_condition & (df["difficulty"] == diff), "line"].tolist()
            random.shuffle(lines)
            for line_text in lines:
                audio_file = get_audio_file(line_text, "..\\audio")
                try:
                    segment = AudioSegment.from_mp3(audio_file)
                    new_final_audio += segment
                except:
                    print(f"Could not find script audio for line: {line_text}")
        new_final_audio.export(f"Theme_{dom or 'None'}_{sub or 'None'}_progressive.mp3", format="mp3")


In [None]:
print(get_audio_file("Mistressâ€™s version of events feels more real to Bambi over time.","..\\audio"))
print(get_audio_file("Mistress's version of events feels more real to Bambi over time.","..\\audio"))