In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q git+https://github.com/openai/whisper.git

# yt-dlp をインストール
!pip install -U yt-dlp

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
Collecting yt-dlp
  Downloading yt_dlp-2025.10.14-py3-none-any.whl.metadata (175 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.9/175.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading yt_dlp-2025.10.14-py3-none-any.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m52.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yt-dlp
Successfully installed yt-dlp-2025.10.14


In [3]:
import os
import subprocess
import whisper
from tqdm import tqdm

# ==============================
# 🎯 設定
# ==============================
YOUTUBE_URLS = [
    "https://www.youtube.com/watch?v=rYEDA3JcQqw"
]

AUDIO_DIR = "/content/drive/MyDrive/Colab Notebooks/218_YT_analyzing_adele/audio_files"
OUTPUT_DIR = "/content/drive/MyDrive/Colab Notebooks/218_YT_analyzing_adele/output_text_files"

# ==============================
# 📂 フォルダ準備
# ==============================
os.makedirs(AUDIO_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ==============================
# 🎧 YouTube音声ダウンロード
# ==============================
print("🎬 YouTube音声をダウンロード中...")
for url in tqdm(YOUTUBE_URLS):
    try:
        subprocess.run(
            [
                "yt-dlp",
                "-x", "--audio-format", "mp3",
                "-o", f"{AUDIO_DIR}/%(id)s.%(ext)s",
                url
            ],
            check=True,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL
        )
    except Exception as e:
        print(f"❌ {url} 取得失敗: {e}")

# ==============================
# 🧠 Whisper文字起こし
# ==============================
print("\n🔍 Whisperで文字起こし中...")
model = whisper.load_model("base")

for file in tqdm(os.listdir(AUDIO_DIR)):
    if not file.endswith(".mp3"):
        continue

    audio_path = os.path.join(AUDIO_DIR, file)
    txt_path = os.path.join(OUTPUT_DIR, file.replace(".mp3", ".txt"))

    try:
        print(f"▶ {file} 文字起こし開始...")
        result = model.transcribe(audio_path)
        text = result["text"]

        with open(txt_path, "w", encoding="utf-8") as f:
            f.write(text)

        print(f"✅ 保存完了: {txt_path}")

    except Exception as e:
        print(f"❌ {file} 文字起こし失敗: {e}")

print("\n🎉 全処理完了！ 出力先:")
print(f"📁 {OUTPUT_DIR}")


🎬 YouTube音声をダウンロード中...


100%|██████████| 1/1 [00:19<00:00, 19.97s/it]



🔍 Whisperで文字起こし中...


100%|███████████████████████████████████████| 139M/139M [00:01<00:00, 97.9MiB/s]


▶ rYEDA3JcQqw.mp3 文字起こし開始...


100%|██████████| 1/1 [00:50<00:00, 50.63s/it]

✅ 保存完了: /content/drive/MyDrive/Colab Notebooks/218_YT_analyzing_adele/output_text_files/rYEDA3JcQqw.txt

🎉 全処理完了！ 出力先:
📁 /content/drive/MyDrive/Colab Notebooks/218_YT_analyzing_adele/output_text_files



