# **Ejemplo de peticiones sin librerías a play.ai (ir a https://play.ai/developers) para conseguir el API_KEY y el USER_ID**




In [None]:
import requests
import os

# Set up headers with your API authentication token and user ID

api_key = userdata.get("PLAY_API_KEY")
user_id = userdata.get("PLAY_USER_ID")
headers = {
    'Authorization': f'Bearer {api_key}',
    'Content-Type': 'application/json',
    'X-USER-ID': user_id
}


# JSON payload containing the script and configuration settings

json_data = {
    'model': 'PlayDialog',
    'text': "All human wisdom is summed up in these two words: Wait and hope.",
    'voice': 's3://voice-cloning-zero-shot/b0aca4d7-1738-4848-a80b-307ac44a7298/original/manifest.json',
    'outputFormat': 'wav'
}

# Send the POST request to the PlayDialog API endpoint
response = requests.post('https://api.play.ai/api/v1/tts/stream', headers=headers, json=json_data)

# Handle response and save audio file
if response.status_code == 200:
    with open('dialogue.wav', 'wb') as f:
        f.write(response.content)
    print("Audio file saved as dialogue.wav")
else:
    print(f"Request failed with status code {response.status_code}: {response.text}")


Audio file saved as dialogue.wav


In [None]:
# Primero instalar las dependencias necesarias
!pip install elevenlabs openai python-dotenv ipywidgets

import ipywidgets as widgets
from IPython.display import display, Audio, clear_output
from elevenlabs import save, ElevenLabs
from openai import OpenAI
import os
from datetime import datetime
import threading

class VoiceInterface:
    def __init__(self, elevenlabs_api_key, openai_api_key):
        self.client = ElevenLabs(api_key=elevenlabs_api_key)
        self.openai_client = OpenAI(api_key=openai_api_key)
        self.voices = self.get_voices()
        self.lista_nombres = {v['name']: k for k, v in self.voices.items()}
        self.setup_ui()

    def get_voices(self):
        try:
            response = self.client.voices.get_all()
            voces = {}
            for voice in response.voices:
                accent = voice.labels.get('accent', 'sin acento')
                voces[voice.voice_id] = {
                    'name': voice.name,
                    'accent': accent
                }
            return voces
        except Exception as e:
            print(f"Error obteniendo las voces: {str(e)}")
            return {}

    def get_dialogo(self, indicaciones, num_frases, ids):
        num_personas = len(ids)
        nombres = ", ".join(self.voices[id]['name'] for id in ids)

        prompt = f"""Generar un dialogo en inglés de {num_frases} frases entre {num_personas} personas,
                    que sus nombres son {nombres}. Cada frase tiene al principio el nombre de la persona
                    que interviene seguido de una almohadilla(#) y la frase de esa persona.
                    Teniendo en cuenta esto: {indicaciones}. La salida sólo debe incluir las frases de
                    las personas empezando por su nombre, y ningun otro texto.
                    Ejemplo: Juan# Hola, como estas? Maria# Bien, gracias."""

        completion = self.openai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "Eres un experto en idiomas."},
                {"role": "user", "content": prompt}
            ]
        )
        return completion.choices[0].message.content

    def setup_ui(self):
        # Crear widgets
        self.prompt_area = widgets.Textarea(
            description='Prompt:',
            placeholder='Introduce tu prompt aquí...',
            layout={'width': '800px', 'height': '100px'}
        )

        self.num_frases = widgets.IntText(
            value=10,
            description='Nº frases:',
            min=1,
            max=100
        )

        self.dialogo_area = widgets.Textarea(
            description='Diálogo:',
            placeholder='El diálogo aparecerá aquí...',
            layout={'width': '800px', 'height': '200px'}
        )

        # Crear checkboxes para las voces
        self.voice_checks = []
        voice_box = widgets.VBox()
        for voice_id, voice_data in self.voices.items():
            checkbox = widgets.Checkbox(
                description=f"{voice_data['name']} ({voice_data['accent']})",
                value=False
            )
            self.voice_checks.append((voice_id, checkbox))
            voice_box.children = (*voice_box.children, checkbox)

        # Botones
        self.generar_dialogo_btn = widgets.Button(description='Generar diálogo')
        self.generar_audio_btn = widgets.Button(description='Generar audio')

        # Eventos de botones
        self.generar_dialogo_btn.on_click(self.on_generar_dialogo_click)
        self.generar_audio_btn.on_click(self.on_generar_audio_click)

        # Mostrar widgets
        display(self.prompt_area)
        display(self.num_frases)
        display(voice_box)
        display(self.generar_dialogo_btn)
        display(self.dialogo_area)
        display(self.generar_audio_btn)

    def on_generar_dialogo_click(self, b):
        selected_voices = [vid for vid, check in self.voice_checks if check.value]
        if not selected_voices:
            print("Selecciona al menos una voz")
            return

        dialogo = self.get_dialogo(
            self.prompt_area.value,
            self.num_frases.value,
            selected_voices
        )
        self.dialogo_area.value = dialogo

    def concatenate_audios(self, generated_files, output_path="output_combined.mp3"):
        with open(output_path, "wb") as outfile:
            for filename in generated_files:
                with open(filename, "rb") as infile:
                    outfile.write(infile.read())

    def on_generar_audio_click(self, b):
        if not self.dialogo_area.value:
            print("Primero genera un diálogo")
            return

        output_dir = "generated_audios"
        os.makedirs(output_dir, exist_ok=True)

        rutas_ficheros = []
        frases = self.dialogo_area.value.split("\n")

        for pos, frase in enumerate(frases):
            if not frase.strip():
                continue

            nombre, texto = frase.split("#")
            nombre = nombre.strip()
            voice_id = self.lista_nombres[nombre]

            audio_content = self.client.generate(
                text=texto,
                voice=voice_id,
                model="eleven_multilingual_v2"
            )

            ruta = os.path.join(output_dir, f"{nombre}{pos}.mp3")
            save(audio_content, ruta)
            rutas_ficheros.append(ruta)

        combined_path = "output_combined.mp3"
        self.concatenate_audios(rutas_ficheros, combined_path)

        # Mostrar el audio combinado
        display(Audio(combined_path))

In [20]:
from google.colab import userdata

interface = VoiceInterface(
    elevenlabs_api_key=userdata.get('ELEVENLABS_API_KEY'),
    openai_api_key=userdata.get('OPENAI_API_KEY')
)

Textarea(value='', description='Prompt:', layout=Layout(height='100px', width='800px'), placeholder='Introduce…

IntText(value=10, description='Nº frases:')

VBox(children=(Checkbox(value=False, description='Aria (American)'), Checkbox(value=False, description='Roger …

Button(description='Generar diálogo', style=ButtonStyle())

Textarea(value='', description='Diálogo:', layout=Layout(height='200px', width='800px'), placeholder='El diálo…

Button(description='Generar audio', style=ButtonStyle())