In [1]:
print("hello")

hello


In [2]:
%pip install yt_dlp

Collecting yt_dlp
  Downloading yt_dlp-2024.12.13-py3-none-any.whl.metadata (172 kB)
Downloading yt_dlp-2024.12.13-py3-none-any.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: yt_dlp
Successfully installed yt_dlp-2024.12.13
Note: you may need to restart the kernel to use updated packages.


In [7]:
from yt_dlp import YoutubeDL

def download_bilibili_audio_from_file(file_path, output_folder="/Users/hy/Downloads/audio_downloads"):
    """
    Downloads audio from Bilibili videos listed in a text file.

    Args:
        file_path (str): Path to the text file containing video URLs.
        output_folder (str): Folder where the audio files will be saved.
    """
    # Read the URLs from the file
    with open(file_path, 'r') as file:
        video_urls = [line.strip() for line in file if line.strip()]

    # Configure download options for audio only
    options = {
        'format': 'bestaudio/best',  # Best available audio format
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',  # Output format (e.g., mp3, aac)
            'preferredquality': '192',  # Audio quality in kbps
        }],
        'outtmpl': f'{output_folder}/%(title)s.%(ext)s',  # Output filename template
        'noplaylist': True,  # Avoid downloading playlists if URL contains one
    }

    # Create the downloader
    with YoutubeDL(options) as ydl:
        for url in video_urls:
            try:
                print(f"Downloading audio from URL: {url}")
                ydl.download([url])
                print("Audio download completed.")
            except Exception as e:
                print(f"Failed to download audio from URL: {url}. Error: {e}")

# Example usage
file_path = "bilibili_video_link.txt"  # Replace with your text file path
download_bilibili_audio_from_file(file_path)


Downloading audio from URL: https://www.bilibili.com/video/BV1We4y1672P/?spm_id_from=333.1387.search.video_card.click
[BiliBili] Extracting URL: https://www.bilibili.com/video/BV1We4y1672P/?spm_id_from=333.1387.search.video_card.click
[BiliBili] 1We4y1672P: Downloading webpage
[BiliBili] BV1We4y1672P: Extracting videos in anthology
[BiliBili] BV1We4y1672P: Downloading wbi sign
[BiliBili] BV1We4y1672P: Downloading video formats for cid 858538728
[BiliBili] Format(s) 1080P 高码率 are missing; you have to become a premium member to download them. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies
[BiliBili] 558877217: Extracting chapters
[info] BV1We4y1672P: Downloading 1 format(s): 30280
[download] Destination: /Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS GPA6.5的澳洲留学生】【考官点评】.m4a
[download] 100% of    2.05MiB in 00:00:01 at 1.03MiB/s   
[ExtractAudio] D

In [10]:
import os
import speech_recognition as sr

# Paths
audio_folder = "/Users/hy/Downloads/audio_downloads"  # Path to the folder with MP3 audio files
output_file = "bilibili_audio_transcript.txt"  # Path to the single output text file

# Initialize SpeechRecognition recognizer
recognizer = sr.Recognizer()

def convert_mp3_to_wav(mp3_file, wav_file):
    """
    Use ffmpeg to convert MP3 to WAV format
    """
    try:
        command = f"ffmpeg -i \"{mp3_file}\" -ar 16000 -ac 1 \"{wav_file}\" -y"
        print(f"Running command: {command}")
        result = os.system(command)
        if result != 0:
            raise RuntimeError(f"FFmpeg failed to convert MP3 to WAV for {mp3_file}")
    except Exception as e:
        print(f"Error converting MP3 to WAV: {e}")

def process_audio_file(mp3_path, recognizer, index):
    """
    Convert MP3 to text
    """
    try:
        # Convert MP3 to WAV
        wav_path = mp3_path.replace(".mp3", ".wav")
        convert_mp3_to_wav(mp3_path, wav_path)

        # Load audio file
        with sr.AudioFile(wav_path) as source:
            audio_data = recognizer.record(source)

        # Use Google Speech Recognition to extract text
        asr_text = recognizer.recognize_google(audio_data, language="en-US")

        # Delete temporary WAV file
        os.remove(wav_path)

        # Return ID and text
        return f"{index}|{asr_text}"

    except Exception as e:
        print(f"Error processing {mp3_path}: {e}")
        return f"{index}|Error processing {mp3_path}: {e}"

# Traverse MP3 files in the folder
all_transcriptions = []
index = 8  # Start ID from 1

for audio_name in os.listdir(audio_folder):
    if audio_name.endswith(".mp3"):
        mp3_path = os.path.join(audio_folder, audio_name)
        print(f"Processing file: {mp3_path}")
        transcription = process_audio_file(mp3_path, recognizer, index)
        all_transcriptions.append(transcription)
        index += 1

# Save all transcriptions to the output file
with open(output_file, "w", encoding="utf-8") as output:
    for transcription in all_transcriptions:
        output.write(transcription + "\n")

print(f"MP3-to-text transcription completed! Transcriptions saved to {output_file}")


Processing file: /Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS未来的广告界女王】【考官点评】【很有用】.mp3
Running command: ffmpeg -i "/Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS未来的广告界女王】【考官点评】【很有用】.mp3" -ar 16000 -ac 1 "/Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS未来的广告界女王】【考官点评】【很有用】.wav" -y


ffmpeg version 6.1.2 Copyright (c) 2000-2024 the FFmpeg developers
  built with clang version 17.0.6
  configuration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1726960400982/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --enable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1726960400982/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --disable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass -

Error processing /Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS未来的广告界女王】【考官点评】【很有用】.mp3: 
Processing file: /Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS13岁天才少女】【考官点评 】 【记下来】.mp3
Running command: ffmpeg -i "/Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS13岁天才少女】【考官点评 】 【记下来】.mp3" -ar 16000 -ac 1 "/Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS13岁天才少女】【考官点评 】 【记下来】.wav" -y


ffmpeg version 6.1.2 Copyright (c) 2000-2024 the FFmpeg developers
  built with clang version 17.0.6
  configuration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1726960400982/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --enable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1726960400982/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --disable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass -

Error processing /Users/hy/Downloads/audio_downloads/【雅思口语全真模考】【原创】【雅思考官尼克VS13岁天才少女】【考官点评 】 【记下来】.mp3: 
MP3-to-text transcription completed! Transcriptions saved to bilibili_audio_transcript.txt
