# Text to Standard Speech

* Convert text to speech
* Download the speech and store it locally

Reference: https://docs.play.ht/reference/api-generate-audio

In [30]:
import os
from pyht import Client
import requests
import re 
from dotenv import load_dotenv
import json
import time

load_dotenv()
 
#print(os.environ['PLAYHT_USER_ID'])

True

In [31]:
# Text to standard speech

story_text= "Hello! Welcome to my vlog of my recent trip to the Great Sand Dunes National Park!\n\nThe trip started with a 400 miles drive for 4 hours and ended with my favourite Indian food Dosa!"
#"Okay sure. I'm super excited to meet you!" 
custom_voice_id= "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json"
voiceover_save_path= "../03. Intermediate Results//test.mp3"

# Configure Plyht
url = "https://api.play.ht/api/v2/tts"

payload = {
    "text": story_text,
    "voice": custom_voice_id,
    "output_format": "mp3",
    "voice_engine": "PlayHT2.0",
    "quality": "medium",
    "seed": 7,
    "temperature": 1,
    "emotion": "male_happy",
    "style_guidance": 20
}

headers = {
    "accept": "text/event-stream",
    "content-type": "application/json",
    "AUTHORIZATION": os.environ['PLAYHT_SECRET_KEY'],
    "X-USER-ID": os.environ['PLAYHT_USER_ID']
}

# Generate and download custom voiceover 
print("Generating Voiceover...")
response = requests.post(url, json=payload, headers=headers)

#print(response.text)
#print(response.__dict__)

if (response.status_code in [200, 201]):
    print("Voiceover Generated")
else:
    print(response)
    print(json.loads(response.__dict__['_content'])['error_message'])
    response.raise_for_status()

Generating Voiceover...
Voiceover Generated


In [32]:
# Extract voiceover job information

str_find= 'event: completed\r\ndata: '
str_find_index= response.text.find(str_find)
str_find_index

voiceover_job_info= json.loads(response.text[str_find_index+len(str_find):].replace("\n",'').replace("\r",''))

# Download voiceover
response_url=  voiceover_job_info['url']
print("Extracted URL:", response_url)

try:
    response_audio = requests.get(response_url)
    if response_audio.status_code == 200:
        file_path = voiceover_save_path 
        
        with open(file_path, 'wb') as f:
            f.write(response_audio.content)
        
        print(f"Audio file downloaded successfully and saved in '{file_path}'.")
    else:
        print(f"Failed to download the audio file. Status code: {response_audio.status_code}")
except requests.RequestException as e:
    print(f"Error downloading the audio file: {e}")

Extracted URL: https://peregrine-results.s3.amazonaws.com/pigeon/TJcA2O6GcHWaikPQNJ_0.mp3
Audio file downloaded successfully and saved in '../03. Intermediate Outputs//test.mp3'.


# Generate transcript with timestamps

Reference: https://docs.play.ht/reference/api-transcribe-audio

In [44]:
# Generate timestamp and transcripts

# Create timestamp job
print("Creating Transcript Job...")
response_job_id= voiceover_job_info['id']

url = "https://api.play.ht/api/v2/transcriptions"

payload = {
    "tts_job_id": response_job_id,
    "format": "JSON",
    "timestamp_level": "SENTENCE"
}

headers = {
    "accept": "application/json",
    "content-type": "application/json",
    "AUTHORIZATION": os.environ['PLAYHT_SECRET_KEY'],
    "X-USER-ID": os.environ['PLAYHT_USER_ID']
}

response_timestamp_job = requests.post(url, json=payload, headers=headers)
print("Transcript Job Created")
#print(response_timestamp_job.text)



# Collect transcript
print("\nDownloading Transcript...")
url = f"https://api.play.ht/api/v2/transcriptions/{response_job_id}"

headers = {
    "accept": "application/json",
    "AUTHORIZATION": os.environ['PLAYHT_SECRET_KEY'],
    "X-USER-ID": os.environ['PLAYHT_USER_ID']
}

response_timestamp = requests.get(url, headers=headers)

# Wait for the job to complete
attempt_id= 1
while(True):
    print(f"    Attempt {attempt_id}")
    response_timestamp = requests.get(url, headers=headers)
    if(response_timestamp.status_code in [200,201]):
        break
    else:
        time.sleep(3)
        attempt_id+=1
        continue
    
print("Transcript Downloaded")
print("\n\Transcript with timestamps (secs):") 
transcription_segments= json.loads(response_timestamp.text)['transcription']['segments']

for segment in transcription_segments:
    print(segment['id'],':',segment['start'],"->", segment['end'],":", segment['text'])

Creating Transcript Job...
Transcript Job Created

Downloading Transcript...
    Attempt 1
Transcript Downloaded

\Transcript with timestamps (secs):
0 : 0 -> 4.18 :  Hello! Welcome to my vlog of my recent trip to the Great Sandines National Park.
1 : 4.82 -> 9.84 :  The trip started with a 400 miles drive for 4 hours and ended with my favorite Indian food dosa.
