Dependency install

In [None]:
!pip install pytube
!pip install sse_starlette
!pip install youtube_transcript_api
!pip install -q -U google-generativeai

API key setup:

In [None]:
import pathlib
import textwrap

import google.generativeai as genai


# Used to securely store your API key
from google.colab import userdata

# Or use `os.getenv('GOOGLE_API_KEY')` to fetch an environment variable.
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-1.5-flash')



Program Logic:

In [None]:
from pytube import YouTube
from pytube.helpers import RegexMatchError
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

# In-memory cache for transcripts
transcript_cache = {}


def get_summary_prompt(transcript: str):
    return f"""
You are the YouTube Summarizer that specializes in summarizing videos shorter than 20
minutes and responding to questions about the video.

What follows is the transcript of a YouTube video. Please provide a summarization of the video
using bullet points.

Transcript: {transcript}"""


def get_video_object(link: str):
    try:
        return YouTube(link)
    except RegexMatchError:
        return None


def compute_transcript_text(video_id: str):
    raw_transcript = YouTubeTranscriptApi.get_transcript(video_id)
    text_transcript = "\n".join([item["text"] for item in raw_transcript])
    return text_transcript


def get_cached_video_transcript(video_id: str):
    return transcript_cache.get(video_id)


def cache_video_transcript(video_id: str, transcript: str):
    transcript_cache[video_id] = transcript


def get_video_transcript(video: YouTube):
    video_id = video.video_id
    cached_transcript = get_cached_video_transcript(video_id)
    if cached_transcript:
        return cached_transcript

    transcript = compute_transcript_text(video_id)
    cache_video_transcript(video_id, transcript)
    return transcript


def summarize_video(link: str):
    video = get_video_object(link)
    if not video:
        return "Invalid video URL."


    try:
        transcript = get_video_transcript(video)
    except TranscriptsDisabled:
        return "Error: Transcripts are disabled for this video. Please provide a new video URL."

    # Assuming you have an external function or API for summarization, you would call it here.
    # For now, we'll just simulate this by returning the transcript as-is for simplicity.
    summary_prompt = get_summary_prompt(transcript)

    # Summarization
    summary = "\n".join(summary_prompt.split("\n"))

    response = chat.send_message(summary)


    return response

Runtime loop:

In [None]:
while True:
  query = input()
  summary = summarize_video(query)
  md_summary = to_markdown(summary.text)
  display(md_summary)

