# ML5-Challenge2: YouTube/Podcast Transcription

In [17]:
import os
import subprocess
import whisper
import yt_dlp
from datetime import datetime

In [18]:

# Set up directories
DOWNLOAD_DIR = os.path.join(os.getcwd(), "media_downloads")
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

# Function to download YouTube audio
def download_youtube_audio(video_url, download_dir):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': os.path.join(download_dir, '%(id)s.%(ext)s'),
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_url, download=True)
        audio_path = os.path.join(download_dir, f"{info_dict['id']}.mp3")
        return audio_path

# Transcribe using Whisper
def transcribe_audio(audio_path, model_name="tiny"):
    model = whisper.load_model(model_name)
    start_time = datetime.utcnow()
    
    result = model.transcribe(audio_path)
    end_time = datetime.utcnow()
    
    transcription_time = (end_time - start_time).total_seconds()
    return result['text'], transcription_time

# Convert transcription to Markdown
def transcription_to_markdown(transcription, audio_path, transcription_time):
    audio_length = subprocess.run(
        ['ffmpeg', '-i', audio_path, '-f', 'null', '-'],
        stderr=subprocess.PIPE,
        text=True
    ).stderr.split()
    
    speed_ratio = round(len(transcription.split()) / transcription_time, 2)
    markdown = f"Transcribed by Whisper at {speed_ratio}x speed.\n\n## Transcript\n\n{transcription}"
    return markdown


In [19]:

# Example usage
if __name__ == "__main__":
    # Step 1: Download audio
    youtube_url = "https://www.youtube.com/watch?v=eekrsC1Do2E"  # Replace with your URL
    print("Downloading YouTube audio...")
    audio_file = download_youtube_audio(youtube_url, DOWNLOAD_DIR)
    print(f"Audio downloaded: {audio_file}")
    
    # Step 2: Transcribe audio
    print("Transcribing audio...")
    transcription, processing_time = transcribe_audio(audio_file)
    print("Transcription complete.")
    
    # Step 3: Generate Markdown
    print("Generating Markdown...")
    markdown_output = transcription_to_markdown(transcription, audio_file, processing_time)
    print(markdown_output)

Downloading YouTube audio...
[youtube] Extracting URL: https://www.youtube.com/watch?v=eekrsC1Do2E
[youtube] eekrsC1Do2E: Downloading webpage
[youtube] eekrsC1Do2E: Downloading ios player API JSON
[youtube] eekrsC1Do2E: Downloading mweb player API JSON
[youtube] eekrsC1Do2E: Downloading m3u8 information
[info] eekrsC1Do2E: Downloading 1 format(s): 251
[download] Destination: e:\school\Private\AI_Challenges\ML 5 Challenge\media_downloads\eekrsC1Do2E.webm
[download] 100% of  458.46KiB in 00:00:00 at 2.81MiB/s   
[ExtractAudio] Destination: e:\school\Private\AI_Challenges\ML 5 Challenge\media_downloads\eekrsC1Do2E.mp3
Deleting original file e:\school\Private\AI_Challenges\ML 5 Challenge\media_downloads\eekrsC1Do2E.webm (pass -k to keep)
Audio downloaded: e:\school\Private\AI_Challenges\ML 5 Challenge\media_downloads\eekrsC1Do2E.mp3
Transcribing audio...


  checkpoint = torch.load(fp, map_location=device)
  start_time = datetime.utcnow()


Transcription complete.
Generating Markdown...
Transcribed by Whisper at 19.17x speed.

## Transcript

 I've been answering my email all the goddamn day. I've been answering my email because my work gets done that way. Can't you feel the fingers aching? Type into early in the morning. Can't you see the letters blurring? It's just that I had four porn.


  end_time = datetime.utcnow()
