<a href="https://colab.research.google.com/github/HarsikaVetrivel/PYTHONAIML/blob/main/Youtube_video_summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 📦 Install dependencies
!pip install transformers youtube-transcript-api --quiet

# 📚 Imports
import re
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import pipeline

# 📍 Extract video ID from URL
def extract_video_id(url):
    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    raise ValueError("Invalid YouTube URL")

# 📄 Get transcript from YouTube
def get_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([entry['text'] for entry in transcript])
    except Exception as e:
        print("❌ Could not fetch transcript:", e)
        return ""

# 🧠 Summarize using Hugging Face
def summarize_text(text):
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
    max_len = 1024
    chunks = [text[i:i+max_len] for i in range(0, len(text), max_len)]

    summary = ""
    for idx, chunk in enumerate(chunks):
        print(f"🧩 Summarizing chunk {idx+1}/{len(chunks)}...")
        out = summarizer(chunk, max_length=120, min_length=30, do_sample=False)[0]['summary_text']
        summary += out + "\n"
    return summary.strip()

# 🔁 Full pipeline
def summarize_video(video_url):
    print("📥 Fetching transcript...")
    video_id = extract_video_id(video_url)
    text = get_transcript(video_id)

    if not text.strip():
        return "⚠️ Transcript not available for this video."

    print("\n📝 Transcript preview (first 500 characters):")
    print(text[:500] + "...\n")

    print("🧠 Summarizing...")
    summary = summarize_text(text)

    return summary if summary else "⚠️ Failed to generate summary."

# ▶️ ENTER your video URL here
video_url = "https://www.youtube.com/watch?v=gm54xjD8_Kk"

# 🚀 Run
summary = summarize_video(video_url)

# 📝 Show summary
print("\n📄 Summary:\n")
print(summary)


📥 Fetching transcript...

📝 Transcript preview (first 500 characters):
okay welcome all of you to session 14 we have two more classes to go so let me see what I let me show you what I want to cover today these are the topics I'm sure there are all of them are important topics in processor architecture world so I want to introduce these techniques and not to the very detailed level of how it is implemented but at least I will show you um the idea behind it so that later on you can read it and learn about all these things on your own okay so register renaming let me ...

🧠 Summarizing...


Device set to use cpu


🧩 Summarizing chunk 1/78...
🧩 Summarizing chunk 2/78...
🧩 Summarizing chunk 3/78...
🧩 Summarizing chunk 4/78...
🧩 Summarizing chunk 5/78...
🧩 Summarizing chunk 6/78...
🧩 Summarizing chunk 7/78...
🧩 Summarizing chunk 8/78...
🧩 Summarizing chunk 9/78...
🧩 Summarizing chunk 10/78...
🧩 Summarizing chunk 11/78...
🧩 Summarizing chunk 12/78...
🧩 Summarizing chunk 13/78...
🧩 Summarizing chunk 14/78...
🧩 Summarizing chunk 15/78...
🧩 Summarizing chunk 16/78...
🧩 Summarizing chunk 17/78...
🧩 Summarizing chunk 18/78...
🧩 Summarizing chunk 19/78...
🧩 Summarizing chunk 20/78...
🧩 Summarizing chunk 21/78...
🧩 Summarizing chunk 22/78...
🧩 Summarizing chunk 23/78...
🧩 Summarizing chunk 24/78...
🧩 Summarizing chunk 25/78...
🧩 Summarizing chunk 26/78...
🧩 Summarizing chunk 27/78...
🧩 Summarizing chunk 28/78...
🧩 Summarizing chunk 29/78...
🧩 Summarizing chunk 30/78...
🧩 Summarizing chunk 31/78...
