# Imports

In [1]:
from pytube import YouTube
from pydub import AudioSegment
import os
import whisper
import wave
from spleeter.separator import Separator
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

# Functions

In [2]:
def transform_audio_format(input_file_path, output_dir_path):
    audio = AudioSegment.from_file(input_file_path)
    
    audio = audio.set_channels(1)
    audio = audio.set_sample_width(2)
    audio = audio.set_frame_rate(16000)

    audio.export(output_dir_path, format="wav")


def verify_audio_format(wav_file_path):
    with wave.open(wav_file_path, 'rb') as file:
        if (wav_file_path.lower().endswith('.wav') and 
            file.getnchannels() == 1 and
            file.getsampwidth() == 2 and
            file.getframerate() == 16000):
            print('\nFile is in correct format.\n')
            return True
        else:
            print('\nFile is not in correct format.\n')
            return False


def download_youtube_audio(url, output_dir_path):
    return YouTube(url).streams.filter(only_audio=True).first().download(
        output_path=output_dir_path
    )


def transcribe_audio(vocal_file_path):
    if not verify_audio_format(vocal_file_path):
        raise Exception('Cannot transcribe audio with invalid file format.')

    base_model = whisper.load_model('base')

    try:
        transcription = base_model.transcribe(vocal_file_path)
        return transcription['text']
    except Exception as e:
        print('Unable to transcribe audio.')
        print(e)

# Logic

In [3]:
# url = 'https://www.youtube.com/watch?v=ry9SYnV3svc' # talking video for test
# url = 'https://www.youtube.com/watch?v=pat2-f0BXCg' # Asthenia - blink-182
url = 'https://www.youtube.com/watch?v=FVdjZYfDuLE' # Wonderwall - Oasis

# create the output directory to house audio file downloads
output_dir_path = 'Output/'
os.makedirs(output_dir_path, exist_ok=True)

# convert and download an audio file (in .mp4 format) from a youtube url
mp4_file_path = download_youtube_audio(url, output_dir_path)
try: print('DOWNLOADED FILE:', mp4_file_path.split('/')[-1])
except: print('DOWNLOADED FILE:', mp4_file_path.split('\\')[-1])

# get the name of the file without file path or file extension
file_name, _ = os.path.splitext(os.path.basename(mp4_file_path))

# set the full path of the .wav file for the audio
wav_file_path = os.path.join(output_dir_path, file_name + '.wav')

# convert the file from .mp4 to .wav
transform_audio_format(mp4_file_path, wav_file_path)
print('CONVERTED FILE:', wav_file_path)

DOWNLOADED FILE: Wonderwall.mp4
CONVERTED FILE: Output/Wonderwall.wav


In [4]:
# separate the vocals from the music
separator = Separator('spleeter:2stems')
separator.separate_to_file(wav_file_path, 'Output/');

INFO:spleeter:File Output/Wonderwall/accompaniment.wav written succesfully
INFO:spleeter:File Output/Wonderwall/vocals.wav written succesfully


In [5]:
vocals_file_path = f"Output/{file_name}/vocals.wav"
accompaniment_file_path = f"Output/{file_name}/accompaniment.wav"

In [6]:
# transcribe the .wav audio file into text
transform_audio_format(vocals_file_path, vocals_file_path)

transcription = transcribe_audio(vocals_file_path)
print(transcription)


File is in correct format.





 I'm gonna be the day that they're gonna throw it back to you I'm gonna be the day that they're gonna throw it back to you Backbeat the weather's on the street that the fire in your heart is loud I'm sure you've heard it all before but you never really had it's loud I don't believe that anybody feels the way I do about you now And all the roads we have to walk a wine game And all the lights I need to spell a blind game There are many things that I'd like to say to you but I don't know how Cause maybe you're gonna be the one that saves me And I'll throw you my wonder wall Two days gonna be the day but they'll never throw it back to you By now you should have somehow realized what you're not to do I don't believe that anybody feels the way I do about you now And all the roads I need you there were winding And all the lights I'd like to play a blind game There are many things that I'd like to say to you but I don't know how I said make me You're gonna be the one that saves me And I'll thr