In [2]:
!pip install yt-dlp openai-whisper torch



In [1]:
import os
import yt_dlp
import whisper

In [3]:
# ------------------------------
# STEP 1: Download audio from YouTube
# ------------------------------
def download_audio(youtube_url, output_path="audio.mp3"):
    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": "temp.%(ext)s",
        "postprocessors": [
            {
                "key": "FFmpegExtractAudio",
                "preferredcodec": "mp3",
                "preferredquality": "192",
            }
        ],
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([youtube_url])

    # Rename downloaded file to fixed name
    for f in os.listdir("."):
        if f.startswith("temp") and f.endswith(".mp3"):
            os.rename(f, output_path)
            break
    return output_path

In [4]:
# ------------------------------
# STEP 2: Transcribe audio using Whisper
# ------------------------------
def transcribe_audio(audio_path, model_size="base", output_file="transcript.txt"):
    print(f"Loading Whisper model ({model_size})...")
    model = whisper.load_model(model_size)

    print("Transcribing...")
    result = model.transcribe(audio_path)

    # Save transcription
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(result["text"])

    print(f"✅ Transcription saved to {output_file}")
    return result["text"]

In [7]:
# ------------------------------
# STEP 3: Run the pipeline
# ------------------------------
if __name__ == "__main__":
    YOUTUBE_URL = "https://www.youtube.com/shorts/RgaWpx87fJg?feature=share"  # replace with your link

    audio_file = download_audio(YOUTUBE_URL, "audio.mp3")
    transcription = transcribe_audio(audio_file, model_size="base", output_file="transcript.txt")

    print("\nSample Transcript (first 500 chars):\n")
    print(transcription[:500])

[youtube] Extracting URL: https://www.youtube.com/shorts/RgaWpx87fJg?feature=share
[youtube] RgaWpx87fJg: Downloading webpage
[youtube] RgaWpx87fJg: Downloading tv client config
[youtube] RgaWpx87fJg: Downloading tv player API JSON
[youtube] RgaWpx87fJg: Downloading ios player API JSON
[youtube] RgaWpx87fJg: Downloading m3u8 information
[info] RgaWpx87fJg: Downloading 1 format(s): 251
[download] Destination: temp.webm
[download] 100% of    1.10MiB in 00:00:00 at 1.14MiB/s   
[ExtractAudio] Destination: temp.mp3
Deleting original file temp.webm (pass -k to keep)
Loading Whisper model (base)...
Transcribing...




✅ Transcription saved to transcript.txt

Sample Transcript (first 500 chars):

 What are the first words you should say in a speech? And what are the last words you should say in a speech? I guarantee if you go to conferences, 19 out of 20 speakers will start in one of these ways. Number one, my name is Conor Neal, I'm from Tango and this talk is about the latest trend in monitoring strategies. All of you are sitting with a piece of paper in front of you that says, I'm Conor Neal, I've come from Ireland and I'm going to talk about Tango zero four and this. So by repeating 
