In [48]:
from youtube_transcript_api import YouTubeTranscriptApi

In [49]:
def extract_yt_transcript(yt_url):
    try:
        video_id = yt_url.split('=')[1]
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_fulltxt = ''
        for transcript in transcript_list:
            transcript_fulltxt +=transcript['text']
        return transcript_fulltxt
    except Exception as e:
        raise e

In [50]:
# image id = "http://img.youtube.com/vi/{video_id}/0.jpg"

In [51]:
tra = extract_yt_transcript("https://youtu.be/un0SjUnHvvE?si=g4sae7pVwRaXg8aJ")

TranscriptsDisabled: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=g4sae7pVwRaXg8aJ! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!

In [None]:
print(tra)

In [85]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import requests
import re
import os

def get_video_id(youtube_url):
    """
    Extract the video ID from a YouTube URL.
    Args:
        youtube_url (str): The YouTube URL.
    Returns:
        str: The extracted video ID or None if not found.
    """
    pattern = r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})'
    match = re.search(pattern, youtube_url)
    return match.group(1) if match else None

def get_video_title(video_id):
    """
    Get the title of the YouTube video.
    Args:
        video_id (str): The YouTube video ID.
    Returns:
        str: The title of the video or "Unknown" if not found.
    """
    url = f"https://www.youtube.com/watch?v={video_id}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        matches = re.findall(r'<title>(.*?)</title>', response.text)
        return matches[0].replace(" - YouTube", "") if matches else "Unknown"
    except requests.RequestException as e:
        print(f"Error fetching video title: {e}")
        return "Unknown"

def download_transcript(video_id):
    """
    Download the transcript and return as a string.
    Args:
        video_id (str): The YouTube video ID.
    Returns:
        str: The transcript text or an empty string if an error occurs.
    """
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcripts = {}

        for transcript in transcript_list:
            transcript_text = transcript.fetch()
            formatter = TextFormatter()
            formatted_text = formatter.format_transcript(transcript_text)

            # Remove timecodes and speaker names
            formatted_text = re.sub(r'\[\d+:\d+:\d+\]', '', formatted_text)
            formatted_text = re.sub(r'<\w+>', '', formatted_text)

            transcripts[transcript.language_code] = formatted_text

        return transcripts
    except Exception as e:
        print(f"Error downloading transcript: {e}")
        return ""

# def main():
#     youtube_url = input("Enter the YouTube video link: ")
#     video_id = get_video_id(youtube_url)

#     if video_id:
#         transcript_text = download_transcript(video_id)
#         if transcript_text:
#             video_title = get_video_title(video_id)
#             file_name = f"{video_id}_{video_title}.txt"
#             file_name = re.sub(r'[\\/*?:"<>|]', '', file_name)  # Remove invalid characters

#             with open(file_name, 'w', encoding='utf-8') as file:
#                 file.write(transcript_text)

#             print(f"Transcript saved to {file_name}")
#         else:
#             print("Unable to download transcript.")
#     else:
#         print("Invalid YouTube URL.")


In [90]:
video_id = get_video_id("htt1ps://youtu.be/eFe8bJ9e_FA?si=A5InRNQMGA7zj2B2")
tra = download_transcript(video_id)
title = get_video_title(video_id)
