In [1]:
!pip install yt-dlp moviepy whisper-timestamped numpy PyQt5



In [2]:
import os
import json
import numpy as np
import whisper_timestamped
from moviepy.editor import VideoFileClip
import yt_dlp

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



In [3]:
def download_audio_from_youtube(url):
    try:
        ydl_opts = {
            'format': 'bestaudio/best',
            'outtmpl': '%(id)s.%(ext)s',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'wav',
                'preferredquality': '192',
            }],
        }

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(url, download=True)
            audio_file = f"{info_dict['id']}.wav"
            return audio_file
        
    except Exception as e:
        return str(e)

In [4]:
def extract_audio_from_video(video_file):
    try:
        audio_file = "extracted_audio.wav"
        video_clip = VideoFileClip(video_file)
        video_clip.audio.write_audiofile(audio_file, codec='pcm_s16le', ffmpeg_params=["-ac", "1", "-ar", "16000"])
        return audio_file
    
    except Exception as e:
        return str(e)

In [5]:
def process_audio(audio_file):
    try:
        print("Processing audio file:", os.path.abspath(audio_file))
        if not os.path.exists(audio_file):
            return "Audio file does not exist."

        audio = whisper_timestamped.load_audio(audio_file)
        audio = audio / np.max(np.abs(audio))  # Normalize audio
        model = whisper_timestamped.load_model("base", device="cpu")
        result = whisper_timestamped.transcribe_timestamped(model, audio, language="en")

        # Save results to SRT file
        writer = whisper_timestamped.utils.get_writer("srt", ".")
        writer(result, "output")
        print(f"SRT file saved: transcription.srt")

        # Save results to JSON file
        json_output_file = "output.json"
        with open(json_output_file, 'w', encoding='utf-8') as json_file:
            json.dump(result, json_file, indent=2, ensure_ascii=False)
        print(f"JSON file saved: {json_output_file}")

        return "Transcription completed"
    
    except Exception as e:
        return str(e)

In [6]:
youtube_url = 'https://www.youtube.com/watch?v=FKvVIThWYYo'  # Change this to your YouTube URL
audio_file = download_audio_from_youtube(youtube_url)
if audio_file:
    result = process_audio(audio_file)
    print(result)

[youtube] Extracting URL: https://www.youtube.com/watch?v=FKvVIThWYYo
[youtube] FKvVIThWYYo: Downloading webpage
[youtube] FKvVIThWYYo: Downloading ios player API JSON
[youtube] FKvVIThWYYo: Downloading mweb player API JSON
[youtube] FKvVIThWYYo: Downloading m3u8 information
[info] FKvVIThWYYo: Downloading 1 format(s): 251
[download] Destination: FKvVIThWYYo.webm
[download] 100% of   14.64MiB in 00:00:06 at 2.28MiB/s   
[ExtractAudio] Destination: FKvVIThWYYo.wav
Deleting original file FKvVIThWYYo.webm (pass -k to keep)
Processing audio file: d:\Study\3\Natural language processing\Group-project\FKvVIThWYYo.wav


100%|██████████| 104858/104858 [04:39<00:00, 375.25frames/s]


SRT file saved: transcription.srt
JSON file saved: output.json
Transcription completed


In [7]:
video_file = 'path_to_your_video_file.mp4'  # Change this to your video file path
audio_file = extract_audio_from_video(video_file)
if audio_file:
    result = process_audio(audio_file)
    print(result)

Processing audio file: d:\Study\3\Natural language processing\Group-project\MoviePy error: the file path_to_your_video_file.mp4 could not be found!
Please check that you entered the correct path
Audio file does not exist.
