### Import required libraries

In [1]:
# --- Libraries ---
import yt_dlp
from youtube_transcript_api import YouTubeTranscriptApi
import os
import pickle

### Function to extract video IDs from https://www.youtube.com/@pantelism using yt_dlp

In [2]:
# --- Function to get video IDs from a YouTube channel ---
def get_video_ids_with_ytdlp(channel_url):
    ydl_opts = {
        'quiet': True,
        'extract_flat': True,
        'skip_download': True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(channel_url, download=False)
        return [entry['id'] for entry in info['entries']]

# --- Parameters ---
channel_url = "https://www.youtube.com/@pantelism"
output_dir = "VideoProj_transcripts"
os.makedirs(output_dir, exist_ok=True)

# --- Fetch video IDs ---
video_ids = get_video_ids_with_ytdlp(channel_url)
print(f"Found {len(video_ids)} video IDs.")

Found 98 video IDs.


### Download transcripts for each video

In [3]:
# --- Download transcripts ---
available_transcripts = []
unavailable_transcripts = []

print("Starting transcript download...")
for idx, vid in enumerate(video_ids):
    try:
        # Fetch transcript (list of dicts with text, start, duration)
        transcript = YouTubeTranscriptApi.get_transcript(vid)

        # Save raw transcript to .pkl
        filepath = os.path.join(output_dir, f"{vid}_transcript.pkl")
        with open(filepath, "wb") as f:
            pickle.dump(transcript, f)

        print(f"[{idx+1}/{len(video_ids)}]  Saved: {filepath}")
        available_transcripts.append(vid)

    except Exception as e:
        print(f"[{idx+1}/{len(video_ids)}]  No transcript for {vid} — {str(e)}")
        unavailable_transcripts.append((vid, str(e)))

# --- Save logs ---
with open("transcript_log_available.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(available_transcripts))

with open("transcript_log_unavailable.txt", "w", encoding="utf-8") as f:
    for vid, err in unavailable_transcripts:
        f.write(f"{vid} — {err}\n")

print("\nDone.")
print(f"Transcripts downloaded: {len(available_transcripts)}")
print(f"No transcripts: {len(unavailable_transcripts)}")

Starting transcript download...
[1/98]  No transcript for PuTxxFVae1M — 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=PuTxxFVae1M! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
[2/98]  No transcript for 4uiyShM1BXg — 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=4uiyShM1BXg! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https: