# Create Audio Files for YouTube Videos

In [53]:
import docx2txt
import os
import random

import pandas as pd

from edge_tts import VoicesManager

In [54]:
script_loc = "/Volumes/BCross/Documents/YouTube/Scripts/"
short_script_loc = "/Volumes/BCross/Documents/YouTube/Short Scripts/"

set_scripts_loc = "/Volumes/BCross/Documents/YouTube/Set Scripts/"

audio_loc = "/Volumes/BCross/Documents/YouTube/Audio/"
short_audio_loc = "/Volumes/BCross/Documents/YouTube/Short Audio/"

# Get the Intro
intro = docx2txt.process(set_scripts_loc + 'Intro.docx')

# Get the Description
description = docx2txt.process(set_scripts_loc + 'Description.docx')

# Get the Outro
outro = docx2txt.process(set_scripts_loc + 'Outro.docx')

In [55]:
historical_figures_list = pd.read_excel(r'../Historical Figures List.xlsx')
short_historical_figures_list = pd.read_excel(r'../Short Historical Figures List.xlsx')

In [56]:
def generate_audio(text_loc, voice, figure, speed="+0%", intro="/Volumes/BCross/Documents/YouTube/Set Scripts/Intro.docx",
                   outro="/Volumes/BCross/Documents/YouTube/Set Scripts/Outro.docx",
                   save_loc="/Volumes/BCross/Documents/YouTube/Audio/"):

    # Get the script
    data = docx2txt.process(text_loc)
    # Remove any of the elements below
    data = data.replace('\r', ' ').replace('\n\n', ' ').replace('\n', ' ')
    # Get the intro and outro
    intro = docx2txt.process(intro)
    
    intro = intro.replace("[Historical Figure]", figure)

    if outro:
        outro = docx2txt.process(outro)
        # Combine the three
        data = intro + "  " + data + "  " + outro
    else:
        data = intro + "  " + data
    
    # Replace any quotation marks
    data = data.replace('"', "'")
    
    # Complete the command with speed adjustment
    command1 = f'edge-tts --voice "{voice}" --text "{data}" --rate="{speed}" --write-media "{save_loc}"'
    os.system(command1)

## Create Audio for Normal Videos

In [57]:
# Here we want to capture any figures who need audio. We can create this in one loop.
need_audio = historical_figures_list[(historical_figures_list.Script_Created == "Yes") &
                                     (historical_figures_list.AI_Voice_Generated == "No")]
need_audio

Unnamed: 0,Figure_ID,Name,Description,Script_Created,AI_Voice_Generated,Images_Obtained,Youtube_Video_Created,Youtube_Video_Posted,Youtube_URL,Word_Count,Model_Used,Wikipedia


In [58]:
# We only do this if audio is needed
if need_audio.shape[0] > 0:
    # Randomly choose an English US voice
    voices = await VoicesManager.create()
    # Get the list of english US voices
    voice = voices.find(Language="en", Locale="en-US")
    # Filter out annoying voice
    voice = list(filter(lambda v: v['ShortName'] != 'en-US-AnaNeural', voice))
    
    # Create figure filepaths
    figure_scripts = list(need_audio.iloc[:,1].values + '.docx')
    
    # Now want to loop through the figures, getting the script, the figures name and then
    # saving the audio before changing the value in the Excel file for needs audio to yes.
    for figure in figure_scripts:
        
        # Choose random voice out of those remaining
        voice_chosen = random.choice(voice)["ShortName"]
        
        # Get the figure name
        fig = os.path.splitext(figure)[0]
        
        # Location of the scripts
        text_loc = script_loc + figure
        
        # Location to save the audio
        save_loc = audio_loc + fig + ".mp3"
        
        # Run the generate_audio function
        generate_audio(text_loc, voice_chosen, fig, save_loc = save_loc)
        
        # Convert the No to Yes in needs audio column
        historical_figures_list.loc[(historical_figures_list.Name == fig), "AI_Voice_Generated"] = "Yes"
    
        # Once all done stop the engine and then overwrite the Excel file
        historical_figures_list.to_excel(r'../Historical Figures List.xlsx', index=False)
    
    print("Complete, my guy!")

### Create Audio for YouTube Shorts

In [59]:
# Here we want to capture any figures who need audio. We can create this in one loop.
short_need_audio = short_historical_figures_list[(short_historical_figures_list.Script_Created == "Yes") &
    (short_historical_figures_list.AI_Voice_Generated == "No")]
short_need_audio

Unnamed: 0,Figure_ID,Name,Description,Script_Created,AI_Voice_Generated,Images_Obtained,Youtube_Video_Created,Youtube_Video_Posted,Youtube_URL,Short_Youtube_URL,Word_Count,Model_Used,Wikipedia
1,2,Marie Antoinette,Queen consort of France during the French Revo...,Yes,No,No,No,No,https://www.youtube.com/watch?v=2--5u7WkEus,,221,gpt-4o,https://en.wikipedia.org/wiki/Marie_Antoinette


In [60]:
# We only do this if audio is needed
if short_need_audio.shape[0] > 0:
    # Randomly choose an English US voice
    voices = await VoicesManager.create()
    # Get the list of english US voices
    voice = voices.find(Language="en", Locale="en-US")
    # Filter out annoying voice
    voice = list(filter(lambda v: v['ShortName'] != 'en-US-AnaNeural', voice))
    
    # Create figure filepaths
    figure_scripts = list(short_need_audio.iloc[:, 1].values + '.docx')
    
    # Now want to loop through the figures, getting the script, the figures name and then
    # saving the audio before changing the value in the Excel file for needs audio to yes.
    for figure in figure_scripts:
        
        # Choose random voice out of those remaining
        voice_chosen = random.choice(voice)["ShortName"]
        
        # Get the figure name
        fig = os.path.splitext(figure)[0]
        
        # Location of the scripts
        text_loc = short_script_loc + figure
        
        # Location to save the audio
        save_loc = short_audio_loc + fig + ".mp3"
        
        # Run the generate_audio function
        generate_audio(text_loc, voice_chosen, fig, speed="+25%", save_loc=save_loc, outro=None)
        
        # Convert the No to Yes in needs audio column
        short_historical_figures_list.loc[(short_historical_figures_list.Name == fig), "AI_Voice_Generated"] = "Yes"
    
        # Once all done stop the engine and then overwrite the Excel file
        short_historical_figures_list.to_excel(r'../Short Historical Figures List.xlsx', index=False)
    
    print("Complete, my guy!")

Complete, my guy!


WEBVTT

00:00:00.090 --> 00:00:03.410
Welcome to Figures in History In this episode we will

00:00:03.420 --> 00:00:07.220
be discussing Marie Antoinette 1 Marie Antoinette was born on

00:00:07.230 --> 00:00:12.560
2 November 1755 in Vienna Austria as Maria Antonia Josepha Johanna the

00:00:12.570 --> 00:00:15.480
youngest daughter of Empress Maria Theresa and Emperor Francis I

00:00:15.630 --> 00:00:20.040
2 At age 14 she married Louis XVI Dauphin of France

00:00:20.280 --> 00:00:24.640
in May 1770 and became the Dauphine of France 3 Marie

00:00:24.650 --> 00:00:28.700
Antoinette became Queen of France on 10 May 1774 when her husband

00:00:28.740 --> 00:00:32.860
ascended the throne as Louis XVI 4 She was falsely accused

00:00:32.870 --> 00:00:34.870
of defrauding the Crown's jewelers in the Affair of the

00:00:34.880 --> 00:00:39.240
Diamond Necklace which severely damaged her reputation 5 Known as

00:00:39.250 --> 00:00:42.320
Madame Déficit she was blamed for France's fina