In [None]:
import subprocess
import re
import os
import shutil
import requests
from diffusers import DiffusionPipeline
import torch
from dotenv import load_dotenv

load_dotenv(dotenv_path='.env')

def voice_generation(dialogue, gender, output_audio_destination_path):
    """
    Generate voice using the Elevenlabs Text-to-Speech API and save the output audio to a destination path.

    Parameters:
        dialogue (str): The text to be converted into speech.
        gender (str): The gender of the voice (either 'female' or 'male').
        output_audio_destination_path (str): The path where the generated audio will be saved.
    """

    # Create Voice using Elevenlabs
    CHUNK_SIZE = 1024
    id = ""
    if gender == "female":
        id = "21m00Tcm4TlvDq8ikWAM"
    elif gender == "male":
        id = "ODq5zmih8GrVes37Dizd"

    url = f"https://api.elevenlabs.io/v1/text-to-speech/{id}"

    headers = {
    "Accept": "audio/mpeg",
    "Content-Type": "application/json",
    "xi-api-key": os.getenv("ELEVENLABS_API_KEY")
    }

    data = {
    "text": dialogue,
    "model_id": "eleven_monolingual_v1",
    "voice_settings": {
        "stability": 0.5,
        "similarity_boost": 0.5
    }
    }

    response = requests.post(url, json=data, headers=headers)
    with open('output.mp3', 'wb') as f:
        for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
            if chunk:
                f.write(chunk)

    # Copy the generated audio to the unreal engine project
    shutil.copy('output.mp3', output_audio_destination_path)


def person_generate(prompt, gender, output_path):
    """
    Generate images from text using the DiffusionPipeline model and save the images to the specified output path.

    Parameters:
        prompt (str): The text prompt for image generation.
        gender (str): The gender associated with the prompt ('male' or 'female').
        output_path (str): The path where the generated images will be saved.
    """

    # Load the DiffusionPipeline model
    pipe = DiffusionPipeline.from_pretrained("SG161222/RealVisXL_V2.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
    pipe.to("cuda")
    if gender == 'male':
        prompt_addition = '4k, photorealistic, man, male, portrait, ultra realistic, high quality, face clearly visible, ray tracing, model '
    if gender == 'female':
        prompt_addition = '4k, photorealistic, woman, female, portrait, ultra realistic, high quality, face clearly visible, ray tracing, model '
    promp = prompt_addition + prompt
    # Generate images from text
    images = pipe(prompt=prompt).images[0]
    images.save(output_path)

def update_prompt_yaml(output_path, prompt_text):
    """
    Update a YAML configuration file with a new text prompt.

    Parameters:
        output_path (str): The path to the YAML configuration file to be updated.
        prompt_text (str): The new text prompt to be added to the configuration file.
    """

    # Create yaml file
    yaml_content = f"""\
Prompt:
  base: ""
  path: "models/DreamBooth_LoRA/realisticVisionV51_v20Novae.safetensors"
  motion_module:
    - "models/Motion_Module/mm_sd_v14.ckpt"
    - "models/Motion_Module/mm_sd_v15.ckpt"

  seed:           5658137986800322009
  steps:          25
  guidance_scale: 7.5

  prompt:
    - "{prompt_text}"
  n_prompt:
    - "semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
"""

    # Write the updated content to the output file
    with open(output_path, 'w') as file:
        file.write(yaml_content)


def extract_gender(text):
    """
    Extract and return the gender information from a text string.

    Parameters:
        text (str): The text containing gender information.

    Returns:
        str: The extracted gender ('male' or 'female') or None if not found.
    """

    # find gender
    gender_match = re.search(r'\((male|female)\)', text)
    if gender_match:
        return gender_match.group(1)
    else:
        return None

def extract_dialogue(text):
    """
    Extract and return the dialogue from a text string enclosed in '<dialogue>' tags.

    Parameters:
        text (str): The text containing dialogue enclosed in '<dialogue>' tags.

    Returns:
        str: The extracted dialogue text or None if not found.
    """

    # find dialogue
    dialogue_match = re.search(r'<dialogue>(.*?)<\/dialogue>', text)
    if dialogue_match:
        return dialogue_match.group(1)
    else:
        return None
    
def handle_animatediff(item):
    """
    Process an 'animatediff' item by updating the prompt YAML file and running a subprocess.

    Parameters:
        item (dict): A dictionary containing an 'animatediff' item with relevant information.
    """

    # modify prompt value for high quality generations
    prompt = item['animatediff'] + "4k, photo realistic, ultra quality, realistic, beautiful, hyper realistic, ray tracing, reflections."
    
    # Modify the prompt.yaml file
    update_prompt_yaml("AnimateDiff/configs/prompts/prompt.yaml", prompt)

    # Run the command using subprocess with the "cd" command
    command = ["cd", "AnimateDiff", "&&", "conda", "activate", "animatediff", "&&", "python", "-m", "scripts.animate", "--config", "configs/prompts/Prompt.yaml"]
    
    subprocess.run(" ".join(command), shell=True)

def handle_sadtalker(item):
    """
    Process a 'sadtalker' item by generating audio and images and running SadTalker subprocess.

    Parameters:
        item (dict): A dictionary containing a 'sadtalker' item with relevant information.
    """

    # extract dialogue and gender
    dialogue = extract_dialogue(item['sadtalker'])
    gender = extract_gender(item['sadtalker'])

    if dialogue and gender:
        # Set paths for audio and image
        audio_path = "SadTalker/examples/driven_audio/audio.wav"
        image_path = "SadTalker/examples/source_image/image.png"
        output_path = "SadTalker/examples/results"

        # Generate audio
        voice_generation(dialogue, gender, audio_path)

        # Generate image
        person_generate(item['sadtalker'], gender, image_path)

        # Run SadTalker subprocess
        command = ["SadTalker/venv/scripts/python.exe", "SadTalker/inference.py",
                   "--driven_audio", audio_path, "--source_image", image_path,
                   "--result_dir", output_path, "--still", "--preprocess", "full", "--enhancer", "gfpgan"]
        subprocess.run(command)

def process_formatted_llm_lines(formatted_llm_lines):
    """
    Process a list of formatted LLM lines, handling both 'animatediff' and 'sadtalker' items.

    Parameters:
        formatted_llm_lines (list): A list of dictionaries containing formatted LLM items.
    """

    # iterate through the lines
    for item in formatted_llm_lines:
        if 'animatediff' in item:
            handle_animatediff(item)
        elif 'sadtalker' in item:
            handle_sadtalker(item)


formatted_llm_lines = [{'animatediff': 'Imagine a dark and gritty street in the heart of a city, the sounds of traffic and people bustling by. The scene is lit by the neon lights of nearby businesses., car, person, traffic light'},
 {'sadtalker': 'John Wick, a legendary assassin, stands in the middle of the street, his eyes cold and his face impassive. He is wearing a black suit and holding a gun in his hand. He says, <dialogue>"I\'m going to kill them all"</dialogue> (male)., person, car, tie, umbrella'},
 {'animatediff': 'John Wick is a deadly assassin, known for his prowess in taking down his targets with precision and efficiency. He is a man on a mission, and he will stop at nothing to get what he wants. With his gun at the ready, he stalks the streets, searching for his next target., person, cell phone, tie'}]

process_formatted_llm_lines(formatted_llm_lines)
