### Necessary imports

In [None]:
import os
import torch
import spacy
import whisper
import requests
from pathlib import Path
from colorama import Fore, Style
from urllib.parse import urlparse, unquote

### Check presence of the ffmpeg environment variable and the binary

In [None]:
def get_ffmpeg_directory():
    path_entries = os.environ['PATH'].split(';')
    ffmpeg_entry = [p for p in path_entries if p.endswith(f'ffmpeg\\bin')] # TODO: linux handler..
    ffmpeg_entry = None if len(ffmpeg_entry) == 0 else ffmpeg_entry[0]
    return ffmpeg_entry


def check_presence_of_ffmpeg_environment_variable(verbose=True):
    ffmpeg_entry = get_ffmpeg_directory()    
    msg_available_ffmpeg_var = f'✔️ ffmpeg environment variable is correctly set and the ffmpeg bin path has been found under:\n{ffmpeg_entry}'
    msg_unavailable_ffmpeg_var = '''❌ The ffmpeg environment variable could not be found. \n 
              This indicates that ffmpeg, which is required for running Whisper, is not installed on your system. \n
              Please download it first from: https://ffmpeg.org \n\n
              If you have downloaded and installed ffmpeg, you need to set the required environment variable for it so 
              that Whisper can access it. For convenience, you can use the following method which tries to do this step 
              automatically. However, this only works for Windows...
              '''    
    if ffmpeg_entry is not None and Path(ffmpeg_entry).exists():
        if verbose: 
            print(msg_available_ffmpeg_var) 
        return True        
    else:
        if verbose:
            print(msg_unavailable_ffmpeg_var) 
        return False           

In [None]:
check_presence_of_ffmpeg_environment_variable()

In [None]:
def setup_ffmpeg_environment_variable():
    if check_presence_of_ffmpeg_environment_variable(verbose=False):
        print('ffmpeg seems to be correctly installed on you machine. Hence, there is no need to run this method.')
        return    
    
    msg_specify_ffmpeg_path = 'If you already downloaded ffmpeg, please specify the ffmpeg bin path: '
    msg_invalid_ffmpeg_path = 'Invalid path, please try again...'    

    # Since a valid ffmpeg environment variable was not found, we must create it in order to run Whisper
    ffmpeg_bin_path = input(msg_specify_ffmpeg_path)
    while ffmpeg_bin_path is None or len(ffmpeg_bin_path.strip()) == 0 or not Path(ffmpeg_bin_path).exists():
        print(msg_invalid_ffmpeg_path)
        ffmpeg_bin_path = input(msg_specify_ffmpeg_path) 

    # Linux..
    if os.name == 'posix': 
        import subprocess
        exp = f'export ffmpeg="{ffmpeg_bin_path}"'
        subprocess.Popen(exp, shell=True).wait()

    # Windows..
    if os.name == 'nt':  
        import win32com.shell.shell as shell

        # Terminate if the path-string is longer than the 1024 character limiation of the setx-command. 
        # See: https://superuser.com/questions/387619/overcoming-the-1024-character-limit-with-setx
        if len(os.environ['path']) + len(ffmpeg_bin_path) >= 1024:
            raise AssertionError(
                'The total length of the path string, including the path of the ffmpeg binary, '
                'exceeds the 1024-character limit of the setx command. Therefore, the automatic setup '
                'of the ffmpeg environment variable cannot proceed and must be performed manually.')

        # Exceute cmd in admin mode in order to set the ffmpeg environment variable via the setx command
        command = f'setx /M PATH "%PATH%;{ffmpeg_bin_path}"'         
        shell.ShellExecuteEx(lpVerb='runas', lpFile='cmd.exe', lpParameters=f'/c {command}')            

        # Successful if output is: {'hInstApp': 42, 'hProcess': <PyHANDLE:0>}            
        print(f"{Fore.RED}{Style.BRIGHT}Attention:{Style.RESET_ALL} To be able to detect the ffmpeg environment variable "
              "system-wide, you have to restart this Jupyter notebook and it's respective process!") 

In [None]:
setup_ffmpeg_environment_variable()

### Load the models for spaCy and Whisper

In [None]:
# Depending on the language of your audio file, the appropriate spaCy model must be loaded. 
# For available models look at: https://spacy.io/usage/models
nlp = spacy.load('en_core_web_sm') 

# The desired quality of the output depends on the underlying model. 
# Currently, Open AI offers a variety of models which are available at: 
# https://github.com/openai/whisper
model = whisper.load_model("large")

### Select audio file

In [None]:
def download_file(url, filename):
    headers = {'User-Agent': 'CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org)'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        with open(filename, 'wb') as file:
            file.write(response.content)
    else:
        print(response.raise_for_status())

In [None]:
# Additional speeches can be found here: https://commons.wikimedia.org/wiki/Category:Audio_files_of_speeches
# ----------------------------------------------------------------------------------------------------------        
url = 'https://upload.wikimedia.org/wikipedia/commons/7/77/Peter_Erdi_English_voice_-_Winner.ogg'
audio_filename = unquote(Path(urlparse(url).path).name)
download_file(url, audio_filename)

### Transcribe audio file

In [None]:
text = model.transcribe(audio_filename)['text']

### Print transcribed sentences

In [None]:
print(*list(nlp(text.strip()).sents), sep='\n') 

### Play audio file to verify the quality..

In [None]:
import win32com.shell.shell as shell

ffplay_command = f'{get_ffmpeg_directory()}\\ffplay {os.getcwd()}\\{audio_filename}'
shell.ShellExecuteEx(lpVerb='open', lpFile='cmd.exe', lpParameters=f'/c {ffplay_command}')