In [1]:
from pytube import YouTube

try:
    yt = YouTube("https://www.youtube.com/watch?v=X7Zd4VyUgL0")
    print(f"Video Title: {yt.title}")
    print(f"Video Length: {yt.length} seconds")
except Exception as e:
    print("Error fetching video:", e)


Error fetching video: Exception while accessing title of https://youtube.com/watch?v=X7Zd4VyUgL0. Please file a bug report at https://github.com/pytube/pytube


In [2]:
from yt_dlp import YoutubeDL
from langchain.document_loaders import YoutubeLoader

def get_video_info(video_url):
    """Fetch YouTube video information using yt-dlp."""
    ydl_opts = {"quiet": True, "format": "bestaudio/best"}
    with YoutubeDL(ydl_opts) as ydl:
        try:
            info = ydl.extract_info(video_url, download=False)
            title = info.get("title", "Unknown Title")
            duration = info.get("duration", "Unknown Duration")
            description = info.get("description", "")
            print(f"Title: {title}\nDuration: {duration} seconds")
            return {"title": title, "duration": duration, "description": description}
        except Exception as e:
            print(f"Error fetching video info: {e}")
            return {}

# Video URL
generic_url = "https://www.youtube.com/watch?v=X7Zd4VyUgL0"

# Fetch and print video metadata
video_info = get_video_info(generic_url)

# Use LangChain YoutubeLoader to extract transcript
try:
    loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
    docs = loader.load()
    print("Documents:", docs)
except Exception as e:
    print(f"Error loading transcript: {e}")


Title: AI vs ML vs DL vs Generative Ai
Duration: 960 seconds
Error loading transcript: Exception while accessing title of https://youtube.com/watch?v=X7Zd4VyUgL0. Please file a bug report at https://github.com/pytube/pytube


In [3]:
from yt_dlp import YoutubeDL
from youtube_transcript_api import YouTubeTranscriptApi

def get_video_metadata(url):
    """Fetch video metadata using yt-dlp."""
    ydl_opts = {"quiet": True}
    with YoutubeDL(ydl_opts) as ydl:
        try:
            info = ydl.extract_info(url, download=False)
            title = info.get("title", "Unknown Title")
            duration = info.get("duration", "Unknown Duration")
            print(f"Title: {title}, Duration: {duration} seconds")
            return info
        except Exception as e:
            print(f"Error fetching metadata: {e}")
            return None

def get_transcript(video_id):
    """Fetch video transcript using youtube_transcript_api."""
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return transcript
    except Exception as e:
        print(f"Transcript fetch error: {e}")
        return None

# Your YouTube video URL
url = "https://www.youtube.com/watch?v=X7Zd4VyUgL0"

# Extract video metadata
metadata = get_video_metadata(url)

# Extract video ID and fetch transcript
if metadata:
    video_id = metadata["id"]
    transcript = get_transcript(video_id)
    print("Transcript:", transcript)




Title: AI vs ML vs DL vs Generative Ai, Duration: 960 seconds
Transcript: [{'text': 'hello all my name is Kush naak and', 'start': 0.52, 'duration': 4.04}, {'text': 'welcome to my YouTube channel so guys', 'start': 2.44, 'duration': 3.76}, {'text': 'three to four years back you know I had', 'start': 4.56, 'duration': 4.52}, {'text': 'created a video uh to make you', 'start': 6.2, 'duration': 4.359}, {'text': 'understand the differences between AI', 'start': 9.08, 'duration': 4.479}, {'text': 'versus ml versus DL versus data science', 'start': 10.559, 'duration': 5.24}, {'text': 'and uh till now probably that is the', 'start': 13.559, 'duration': 4.201}, {'text': 'video and that was a 9 Minutes video', 'start': 15.799, 'duration': 5.081}, {'text': 'where I clearly differentiated between', 'start': 17.76, 'duration': 5.48}, {'text': 'all these terms right specifically that', 'start': 20.88, 'duration': 4.6}, {'text': 'you see over here and right now that is', 'start': 23.24, 'duration': 