# Create Audio Files for YouTube Videos

In [23]:
import docx2txt
import os
import random

import pandas as pd

from edge_tts import VoicesManager

In [24]:
script_loc = "/Volumes/BCross/Documents/YouTube/Scripts/"
short_script_loc = "/Volumes/BCross/Documents/YouTube/Short Scripts/"

set_scripts_loc = "/Volumes/BCross/Documents/YouTube/Set Scripts/"

audio_loc = "/Volumes/BCross/Documents/YouTube/Audio/"
short_audio_loc = "/Volumes/BCross/Documents/YouTube/Short Audio/"

# Get the Intro
intro = docx2txt.process(set_scripts_loc + 'Intro.docx')

# Get the Description
description = docx2txt.process(set_scripts_loc + 'Description.docx')

# Get the Outro
outro = docx2txt.process(set_scripts_loc + 'Outro.docx')

In [25]:
historical_figures_list = pd.read_excel(r'../Historical Figures List.xlsx')
short_historical_figures_list = pd.read_excel(r'../Short Historical Figures List.xlsx')

In [41]:
def generate_audio(text_loc, voice, figure, speed="+0%", intro="/Volumes/BCross/Documents/YouTube/Set Scripts/Intro.docx",
                   outro="/Volumes/BCross/Documents/YouTube/Set Scripts/Outro.docx",
                   save_loc="/Volumes/BCross/Documents/YouTube/Audio/"):

    # Get the script
    data = docx2txt.process(text_loc)
    # Remove any of the elements below
    data = data.replace('\r', ' ').replace('\n\n', ' ').replace('\n', ' ')
    # Get the intro and outro
    intro = docx2txt.process(intro)
    
    intro = intro.replace("[Historical Figure]", figure)

    if outro:
        outro = docx2txt.process(outro)
        # Combine the three
        data = intro + "  " + data + "  " + outro
    else:
        data = intro + "  " + data
    
    # Replace any quotation marks
    data = data.replace('"', "'")
    
    # Complete the command with speed adjustment
    command1 = f'edge-tts --voice "{voice}" --text "{data}" --rate="{speed}" --write-media "{save_loc}"'
    os.system(command1)

## Create Audio for Normal Videos

In [45]:
# Here we want to capture any figures who need audio. We can create this in one loop.
need_audio = historical_figures_list[(historical_figures_list.Script_Created == "Yes") &
                                     (historical_figures_list.AI_Voice_Generated == "No")]
need_audio

Unnamed: 0,Figure_ID,Name,Description,Script_Created,AI_Voice_Generated,Images_Obtained,Youtube_Video_Created,Youtube_Video_Posted,Youtube_URL,Word_Count,Model_Used,Wikipedia


In [43]:
# We only do this if audio is needed
if need_audio.shape[0] > 0:
    # Randomly choose an English US voice
    voices = await VoicesManager.create()
    # Get the list of english US voices
    voice = voices.find(Language="en", Locale="en-US")
    # Filter out annoying voice
    voice = list(filter(lambda v: v['ShortName'] != 'en-US-AnaNeural', voice))
    
    # Create figure filepaths
    figure_scripts = list(need_audio.iloc[:,1].values + '.docx')
    
    # Now want to loop through the figures, getting the script, the figures name and then
    # saving the audio before changing the value in the Excel file for needs audio to yes.
    for figure in figure_scripts:
        
        # Choose random voice out of those remaining
        voice_chosen = random.choice(voice)["ShortName"]
        
        # Get the figure name
        fig = os.path.splitext(figure)[0]
        
        # Location of the scripts
        text_loc = script_loc + figure
        
        # Location to save the audio
        save_loc = audio_loc + fig + ".mp3"
        
        # Run the generate_audio function
        generate_audio(text_loc, voice_chosen, fig, save_loc = save_loc)
        
        # Convert the No to Yes in needs audio column
        historical_figures_list.loc[(historical_figures_list.Name == fig), "AI_Voice_Generated"] = "Yes"
    
        # Once all done stop the engine and then overwrite the Excel file
        historical_figures_list.to_excel(r'../Historical Figures List.xlsx', index=False)
    
    print("Complete, my guy!")

Complete, my guy!


WEBVTT

00:00:00.100 --> 00:00:04.025
Welcome to Figures in History In this episode we will

00:00:04.037 --> 00:00:10.213
be discussing Ivan Lendl 1 Ivan Lendl born on March 7, 1960

00:00:10.338 --> 00:00:15.188
in Ostrava Czechoslovakia emerged from a tennis-centric family with both

00:00:15.200 --> 00:00:19.387
parents being top players in their country His mother Olga

00:00:19.400 --> 00:00:23.262
Jeništová was notably ranked as the No. 2 female player in

00:00:23.275 --> 00:00:28.212
Czechoslovakia Lendl's early exposure to tennis set the stage for

00:00:28.225 --> 00:00:32.212
his illustrious career As a junior he quickly made a

00:00:32.225 --> 00:00:35.200
name for himself by winning the boys' singles titles at

00:00:35.212 --> 00:00:39.375
both the French Open and Wimbledon in 1978 earning the world

00:00:39.388 --> 00:00:44.575
No. 1 junior ranking Turning professional the same year Lendl's ascent

00:00:44.587 --> 00:00:48.987
in the tennis world was rapid By 1980 he

### Create Audio for YouTube Shorts

In [44]:
# Here we want to capture any figures who need audio. We can create this in one loop.
short_need_audio = short_historical_figures_list[(short_historical_figures_list.Script_Created == "Yes") &
    (short_historical_figures_list.AI_Voice_Generated == "No")]
short_need_audio

Unnamed: 0,Figure_ID,Name,Description,Script_Created,AI_Voice_Generated,Images_Obtained,Youtube_Video_Created,Youtube_Video_Posted,Youtube_short_URL,Youtube_URL,Word_Count,Model_Used,Wikipedia


In [39]:
# We only do this if audio is needed
if short_need_audio.shape[0] > 0:
    # Randomly choose an English US voice
    voices = await VoicesManager.create()
    # Get the list of english US voices
    voice = voices.find(Language="en", Locale="en-US")
    # Filter out annoying voice
    voice = list(filter(lambda v: v['ShortName'] != 'en-US-AnaNeural', voice))
    
    # Create figure filepaths
    figure_scripts = list(short_need_audio.iloc[:, 1].values + '.docx')
    
    # Now want to loop through the figures, getting the script, the figures name and then
    # saving the audio before changing the value in the Excel file for needs audio to yes.
    for figure in figure_scripts:
        
        # Choose random voice out of those remaining
        voice_chosen = random.choice(voice)["ShortName"]
        
        # Get the figure name
        fig = os.path.splitext(figure)[0]
        
        # Location of the scripts
        text_loc = short_script_loc + figure
        
        # Location to save the audio
        save_loc = short_audio_loc + fig + ".mp3"
        
        # Run the generate_audio function
        generate_audio(text_loc, voice_chosen, fig, speed="+25%", save_loc=save_loc, outro=None)
        
        # Convert the No to Yes in needs audio column
        short_historical_figures_list.loc[(short_historical_figures_list.Name == fig), "AI_Voice_Generated"] = "Yes"
    
        # Once all done stop the engine and then overwrite the Excel file
        short_historical_figures_list.to_excel(r'../Short Historical Figures List.xlsx', index=False)
    
    print("Complete, my guy!")

Complete, my guy!


WEBVTT

00:00:00.080 --> 00:00:02.930
Welcome to Figures in History In this episode we will

00:00:02.940 --> 00:00:06.490
be discussing Ivan Lendl 1 Ivan Lendl was ranked world

00:00:06.500 --> 00:00:10.430
No. 1 in singles for a then-record 270 weeks 2 He

00:00:10.440 --> 00:00:12.750
won eight major singles titles and was the first man

00:00:12.760 --> 00:00:16.690
to contest 19 major finals 3 Lendl contested a record

00:00:16.770 --> 00:00:20.220
eight consecutive US Open finals 4 He is the only

00:00:20.230 --> 00:00:22.980
man in professional tennis history to have a match-winning percentage

00:00:22.990 --> 00:00:27.070
of over 90% in five different years 5 Lendl pioneered

00:00:27.080 --> 00:00:30.070
the aggressive baseline power tennis style earning him the nickname

00:00:30.090 --> 00:00:33.470
Father Of Modern Tennis 6 He helped Andy Murray win

00:00:33.480 --> 00:00:36.110
three major titles and reach the world No. 1 ranking as

00:00:36.120 --> 00:00:39.820
a coa