# Download YouTube Videos as Audio

In [None]:
import yt_dlp
import os
# pip install ffmpeg-python
# Set the output directory
output_path = "audio"
os.makedirs(output_path, exist_ok=True)

# Define yt-dlp options and specify the path to FFmpeg explicitly
ydl_opts = {
    'format': 'bestaudio/best',  # Choose the best audio quality
    'outtmpl': f'{output_path}/%(title)s.%(ext)s',  # Specify the output template
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
    }],
    'ffmpeg_location': r'\C:\Users\s83me\Downloads\FFmpeg\bin'  # Specify path to FFmpeg's bin folder
}

# Function to download audio using yt-dlp
def download_audio_yt_dlp(video_url):
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])

# Define your video URLs
video_urls = [
    "https://youtu.be/hg5_BPjJTFY",
    "https://youtu.be/QMErl8l6mnw",
    "https://www.youtube.com/watch?v=PyrBzUaRO88"
]


# Download individual videos
for url in video_urls:
    print(f"Downloading audio from {url}...")
    download_audio_yt_dlp(url)
    print(f"Downloaded audio from: {url}")

# Download all videos in a playlist
playlist_url = "https://www.youtube.com/playlist?list=PLlj5bsLjtj-L3vGAECzBJqTLT3uxl2hnL"
print(f"Downloading playlist from {playlist_url}...")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([playlist_url])
print("Playlist download complete.")

In [None]:
# Download in cmd line

# yt-dlp -f bestaudio --extract-audio --audio-format mp3 "https://youtu.be/9qIMQi1E7x0?si=3yuko8ekBrygjj_-"

# yt-dlp -f bestaudio --extract-audio --audio-format mp3 --ffmpeg-location "\C:\Users\s83me\Downloads\FFmpeg\bin" "https://youtu.be/bWfCHhm7PWk?si=DgAMA8FdePP6A5f8"

# Transcribe Audio Files Using OpenAI Whisper API

In [10]:
# Manually define the list of audio files in the exact order you want to transcribe
audio_files = [
    "audio/1.Wuthering Heights Guide ¦ Summary & Biography of Emily Bronté Ep 1.webm",
    "audio/2.Unreliable Narrators in Wuthering Heights： Nelly Dean and Lockwood ¦ WH Guide Ep.2 [9WISqpOHUo4].webm",
    "audio/3.Is Wuthering Heights a Love Story？ ¦ WH Guide, Ep.3 [I95_d8Xsj78].webm",
    "audio/4.Revenge, Forgiveness & Religion in Wuthering Heights ¦ WH Guide, Ep.4 [MJGZBg8mwnM].webm",
    "audio/5.Nature VS Civilisation ¦ Wuthering Heights ¦ WH Guide Ep.5 [g8Gow8Sdp_0].webm",
    "audio/6.Catherine Earnshaw ¦ Character Study ¦ Wuthering Heights Guide Ep.6 [Y1fetLEOEX8].webm",
    "audio/7.Heathcliff ¦ Character Study ¦ Wuthering Heights Guide Ep 7 [ROGC7w5qUpg].webm",
    "audio/8.Edgar & Isabella Linton ¦ Character Studies ¦ Wuthering Heights Guide Ep.8 [86PF3wnotqo].webm",
    "audio/9.Cathy, Hareton, and Linton ¦ Character Studies ¦ Wuthering Heights Guide Ep 9.webm",
    "audio/10.Ghosts, Magic & Witches ¦ Wuthering Heights Guide Ep 10.webm",       
    "audio/WUTHERING HEIGHTS by Emily Brontë ｜ Book Review [PyrBzUaRO88].webm",
    "audio/WUTHERING HEIGHTS – Structure & Narrative Technique ｜ Emily Brontë WUTHERING HEIGHTS novel analysis [hg5_BPjJTFY].webm",
    "audio/How to Read Wuthering Heights by Emily Brontë (10 Tips) [QMErl8l6mnw].webm",
    "audio/Wuthering Heights： Emily Brontë’s Misunderstood Masterpiece ｜ SLICE WHO ｜ FULL DOCUMENTARY [9qIMQi1E7x0].webm"
    
]

In [None]:
import whisper
import os

file_path = r"C:\Users\s83me\Downloads\Project 3 Wuthering Heights\audio"
model = whisper.load_model("base")

# List to hold transcriptions for each file
transcriptions = []

# Transcribe each audio file in the specified folder
for audio_file in os.listdir(file_path):
    audio_path = os.path.join(file_path, audio_file)
    print(f"Transcribing {audio_path}...")
    result = model.transcribe(audio_path)
    transcriptions.append((audio_path, result["text"]))
    print(f"Transcription for {audio_path} completed.")

# Save all transcriptions to a single text file in order
output_file = "Ordered_Transcriptions.txt"
with open(output_file, "w", encoding="utf-8") as f:
    for file, transcription in transcriptions:
        f.write(f"Transcription for {os.path.basename(file)}:\n")
        f.write(transcription)
        f.write("\n" + "=" * 50 + "\n\n")

print(f"All transcriptions saved to {output_file}")
