# Create Audio Files for YouTube Videos

In [4]:
import docx2txt
import os
import random

import pandas as pd

from edge_tts import VoicesManager

In [5]:
def need_audio(df_loc):
    """Return a df for any figures for which we need to create an audio file"""
    
    df = pd.read_excel(df_loc)

    need_audio = df[(df.Script_Created == "Yes") &
                    (df.AI_Voice_Generated == "No")]
    
    return need_audio

In [6]:
def text_to_speech(text_loc, voice, figure, intro, outro, save_loc, speed="+0%",):

    # Get the script
    data = docx2txt.process(text_loc)
    # Remove any of the elements below
    data = data.replace('\r', ' ').replace('\n\n', ' ').replace('\n', ' ')
    # Get the intro and outro
    intro = docx2txt.process(intro)
    
    intro = intro.replace("[Historical Figure]", figure)

    if outro:
        outro = docx2txt.process(outro)
        # Combine the three
        data = intro + "  " + data + "  " + outro
    else:
        data = intro + "  " + data
    
    # Replace any quotation marks
    data = data.replace('"', "'")
    
    # Complete the command with speed adjustment
    command1 = f'edge-tts --voice "{voice}" --text "{data}" --rate="{speed}" --write-media "{save_loc}"'
    os.system(command1)

In [9]:
async def create_audio(figure, script_loc, audio_loc, df_loc, intro, outro, speed="+0%"):

    # Randomly choose an English US voice
    voices = await VoicesManager.create()
    # Get the list of english US voices
    voice = voices.find(Language="en", Locale="en-US")
    # Filter out annoying voice
    voice = list(filter(lambda v: v['ShortName'] != 'en-US-AnaNeural', voice))
        
    # Choose random voice out of those remaining
    voice_chosen = random.choice(voice)["ShortName"]
        
    # Location of the scripts
    text_loc = script_loc + figure + ".docx"
        
    # Location to save the audio
    save_loc = audio_loc + figure + ".mp3"
        
    # Run the generate_audio function
    text_to_speech(text_loc, voice_chosen, figure, intro, outro, audio_loc, speed)

    df = pd.read_excel(df_loc)
    
    # Convert the No to Yes in needs audio column
    df.loc[(df.Name == figure), "AI_Voice_Generated"] = "Yes"
    
    # Once all done stop the engine and then overwrite the Excel file
    df.to_excel(df_loc, index=False)