In [17]:
import inspect
from youtube_transcript_api import YouTubeTranscriptApi

print([m for m, f in inspect.getmembers(YouTubeTranscriptApi, inspect.isfunction)])


['__init__', 'fetch', 'list']


In [18]:
from youtube_transcript_api import YouTubeTranscriptApi

video_id = "QVz0tXw7-FE"

# Crée une instance
api = YouTubeTranscriptApi()

# Appelle list() sur l’instance
transcripts = api.list(video_id)

print(type(transcripts))
print(transcripts)


<class 'youtube_transcript_api._transcripts.TranscriptList'>
For this video (QVz0tXw7-FE) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en ("English")[TRANSLATABLE]

(GENERATED)
 - en ("English (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ar ("Arabic")
 - zh-Hant ("Chinese (Traditional)")
 - nl ("Dutch")
 - fr ("French")
 - de ("German")
 - hi ("Hindi")
 - id ("Indonesian")
 - it ("Italian")
 - ja ("Japanese")
 - ko ("Korean")
 - pt ("Portuguese")
 - ru ("Russian")
 - es ("Spanish")
 - th ("Thai")
 - tr ("Turkish")
 - uk ("Ukrainian")
 - vi ("Vietnamese")


In [1]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

load_dotenv()

api_key = os.getenv("API_KEY")
youtube = build('youtube', 'v3', developerKey=api_key)

In [2]:
import re
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound

def get_video_id(url: str) -> str:
    """
    Extrait l'ID de la vidéo à partir d'une URL YouTube.
    """
    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(pattern, url)
    if not match:
        raise ValueError("Impossible de trouver l'ID de la vidéo.")
    return match.group(1)

def get_transcript_english(url: str):
    try:
        video_id = get_video_id(url)
        api = YouTubeTranscriptApi()
        transcripts = api.list(video_id)

        # Sélection du transcript en anglais
        transcript = transcripts.find_transcript(['en'])

        # Récupération → liste de FetchedTranscriptSnippet
        data = transcript.fetch()

        # Transformation en texte continu
        full_text = " ".join(entry.text for entry in data)

        return full_text

    except TranscriptsDisabled:
        return "⚠️ Les sous-titres sont désactivés pour cette vidéo."
    except NoTranscriptFound:
        return "⚠️ Aucun transcript disponible en anglais pour cette vidéo."
    except Exception as e:
        return f"Erreur : {e}"


if __name__ == "__main__":
    video_url = "https://www.youtube.com/watch?v=QVz0tXw7-FE"
    transcript_text = get_transcript_english(video_url)

    print("=== Transcript anglais ===\n")
    print(transcript_text[:1000], "...")  # affiche les ~1000 premiers caractères


=== Transcript anglais ===

What's up, guys? Jeff Cavaliere, ATHLEANX.com. I'm talking to the skinny guys today. Now don't worry, it's going to be helpful
for all, but I'm going to get inside the minds of the skinny guys because I know what the
hell you're thinking. I've been one and I know how to get you a
lot bigger than you are right now because I know the things that you should be focusing
on. Today I'm going to tell you exactly what to
focus on in your workouts if you don’t want to be skinny anymore. The first thing I'm going to tell you to do
– shocker – is get your ass to the gym. Now, it's not about telling you to start working
out. You already know that. You're probably already doing that. I'm saying get yourself to the gym. Face your fears. Remember I said I was going to get in your
head? I know what you're thinking. You're maybe a little nervous to go to the
gym because you're afraid of the ridicule being the skinny guy. You look around, you see all these guys lifting
heavy 

                                            a

In [3]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

# Load API key from .env
load_dotenv()
api_key = os.getenv("API_KEY")

# Build the YouTube API client
youtube = build("youtube", "v3", developerKey=api_key)

# Function to resolve @handle -> channel ID
def get_channel_id_from_handle(youtube, handle: str) -> str:
    request = youtube.search().list(
        part="snippet",
        q=handle,
        type="channel",
        maxResults=1
    )
    response = request.execute()
    return response["items"][0]["snippet"]["channelId"]

# Resolve Athlean-X channel ID
CHANNEL_HANDLE = "@athleanx"
CHANNEL_ID = get_channel_id_from_handle(youtube, CHANNEL_HANDLE)
print("Resolved CHANNEL_ID:", CHANNEL_ID)

# Now fetch all video URLs
video_urls = []
next_page_token = None

while True:
    request = youtube.search().list(
        part="id",
        channelId=CHANNEL_ID,
        maxResults=50,   # maximum allowed per request
        order="date",    # newest first
        pageToken=next_page_token
    )
    response = request.execute()

    for item in response.get("items", []):
        if item["id"]["kind"] == "youtube#video":
            video_id = item["id"]["videoId"]
            video_urls.append(f"https://www.youtube.com/watch?v={video_id}")

    # Get next page if available
    next_page_token = response.get("nextPageToken")
    if not next_page_token:
        break

print(f"Found {len(video_urls)} videos")
print(video_urls[:10], "...")  # show first 10 for preview


Resolved CHANNEL_ID: UCe0TLA0EsQbE-MjuHXevj2A
Found 23 videos
['https://www.youtube.com/watch?v=ZJ09graxv8U', 'https://www.youtube.com/watch?v=-7TPSr0-yo4', 'https://www.youtube.com/watch?v=Lr9JJNJS7dY', 'https://www.youtube.com/watch?v=Iio94MY5jFM', 'https://www.youtube.com/watch?v=uXTbDvyhbnA', 'https://www.youtube.com/watch?v=72ZQ8buGL6I', 'https://www.youtube.com/watch?v=IlqmuM9FPUU', 'https://www.youtube.com/watch?v=zp1Fus-pgZo', 'https://www.youtube.com/watch?v=a6jkHbYyHgk', 'https://www.youtube.com/watch?v=Y12vQQ7meYs'] ...


In [4]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

# Load API key
load_dotenv()
api_key = os.getenv("API_KEY")

youtube = build("youtube", "v3", developerKey=api_key)

# Resolve @handle -> channel ID
def get_channel_id_from_handle(youtube, handle: str) -> str:
    request = youtube.search().list(
        part="snippet",
        q=handle,
        type="channel",
        maxResults=1
    )
    response = request.execute()
    return response["items"][0]["snippet"]["channelId"]

CHANNEL_HANDLE = "@athleanx"
CHANNEL_ID = get_channel_id_from_handle(youtube, CHANNEL_HANDLE)
print("Resolved CHANNEL_ID:", CHANNEL_ID)

# Step 1: Get the "uploads" playlist ID for this channel
channel_response = youtube.channels().list(
    part="contentDetails",
    id=CHANNEL_ID
).execute()

uploads_playlist_id = channel_response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
print("Uploads Playlist ID:", uploads_playlist_id)

# Step 2: Get all videos from uploads playlist
video_urls = []
next_page_token = None

while True:
    playlist_request = youtube.playlistItems().list(
        part="contentDetails",
        playlistId=uploads_playlist_id,
        maxResults=50,
        pageToken=next_page_token
    )
    playlist_response = playlist_request.execute()

    for item in playlist_response["items"]:
        video_id = item["contentDetails"]["videoId"]
        video_urls.append(f"https://www.youtube.com/watch?v={video_id}")

    next_page_token = playlist_response.get("nextPageToken")
    if not next_page_token:
        break

print(f"Found {len(video_urls)} videos in total")
print(video_urls[:10], "...")


Resolved CHANNEL_ID: UCe0TLA0EsQbE-MjuHXevj2A
Uploads Playlist ID: UUe0TLA0EsQbE-MjuHXevj2A
Found 1632 videos in total
['https://www.youtube.com/watch?v=ZJ09graxv8U', 'https://www.youtube.com/watch?v=-7TPSr0-yo4', 'https://www.youtube.com/watch?v=F8DErukW4yo', 'https://www.youtube.com/watch?v=A2s4QLmND2E', 'https://www.youtube.com/watch?v=JA3Ys9v4NyI', 'https://www.youtube.com/watch?v=tVA7S5_MeqQ', 'https://www.youtube.com/watch?v=Fz7wUtKh3gQ', 'https://www.youtube.com/watch?v=THqN40NcT8M', 'https://www.youtube.com/watch?v=Yms9gHkXQf8', 'https://www.youtube.com/watch?v=_M0mJjxDI68'] ...


In [5]:
with open("athleanx_videos.txt", "w") as f:
    for url in video_urls:
        f.write(url + "\n")

print("Saved all video URLs to athleanx_videos.txt")


Saved all video URLs to athleanx_videos.txt
