In [1]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

In [None]:
def summarizer(text, max_length, min_length):
    # Setting max and min length for summaries
    if max_length > 4096:
        max_length = 4096
    if min_length > max_length:
        min_length = max_length // 2
        
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    
    # Generate Summary ids
    summary_ids = model.generate(
        inputs.input_ids,
        min_length=min_length,
        max_length=max_length,
        num_beams=4,
        early_stopping=True,
        no_repeat_ngram_size=3
    )
    
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [None]:
import math
def get_video_summary(video_transcript):
    # Split by sentences or paragraphs
    chunks = split_into_chunks(video_transcript)

    # Process each chunk separately
    chunk_summaries = []
    for chunk in chunks:
        # Calculating limits for Chunks
        word_count = len(chunk.split())
        max_length = min(math.floor(word_count // 1.10), 1024)  # Cap at 1024 tokens
        min_length = min(math.floor(word_count // 1.33), 512)   # Cap at 512 tokens

        # Add minimum thresholds to avoid too short summaries
        max_length = max(max_length, 100)
        min_length = max(min_length, 20)

        # Get summary for current chunk
        try:
            summary = summarizer(chunk, max_length=max_length, min_length=min_length)
            chunk_summaries.append(summary)
        except Exception as e:
            print(f"Error summarizing chunk: {e}")
            # Use first part of the Chunk, Since there will be final summarization
            chunk_summaries.append(chunk[:500] + "...")

    # If we have multiple chunks, summarize the combined summaries
    if len(chunk_summaries) > 1:
        combined_summary = " ".join(chunk_summaries)
        # Do a final summary of the combined summaries 
        try:
            final_summary = summarizer(
                combined_summary,
                max_length=min(math.floor(len(combined_summary.split()) // 1.5), 1024),
                min_length=min(math.floor(len(combined_summary.split()) // 2), 512)
            )
            return final_summary
        except Exception as e:
            print(f"Error in final summarization: {e}")
            return combined_summary
    elif len(chunk_summaries) == 1:
        return chunk_summaries[0]
    else:
        return ""


In [None]:
def split_into_chunks(text, max_chunk_size=500):
    """Split text into chunks by sentences, respecting max_chunk_size in words."""
    # Split by sentences 
    sentences = text.replace('. ', '.|').replace('! ', '!|').replace('? ', '?|').split('|')

    chunks = []
    current_chunk = []
    current_size = 0

    for sentence in sentences:
        if not sentence.strip():
            continue

        sentence_size = len(sentence.split())

        if current_size + sentence_size > max_chunk_size and current_chunk:
            # Save current chunk and start a new one
            chunks.append(' '.join(current_chunk))
            current_chunk = [sentence]
            current_size = sentence_size
        else:
            # Add sentence to current chunk
            current_chunk.append(sentence)
            current_size += sentence_size

    # Add the last chunk if not empty
    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

In [None]:
def get_video_transcript(video_id):
  try:
    # Fetch transcript
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id)

    # Combine transcript segments into a single text
    transcript_text = " ".join([item['text'] for item in transcript_list])
    print(transcript_text)
    return transcript_text

  # Handle error if transcript for the video is disabled
  except TranscriptsDisabled:
    return "Error: Transcripts are disabled for this video."

  # Handle error if video has no transcript
  except NoTranscriptFound:

    # Try finding transcript in any other language
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        available_transcript = transcript_list.find_transcript(['en'])
        transcript = available_transcript.fetch()
        transcript_text = " ".join([item['text'] for item in transcript])
        return transcript_text
    except:
        return "Error: No transcript found for this video in any language."

  # Handle error if video is unavailable
  except VideoUnavailable:
    return "Error: The video is unavailable. It might be private or deleted."

  # Handle error if video link is not in correct format
  except IndexError:
    return "Error: Invalid YouTube URL format."

  # Handle other unexpected errors
  except Exception as e:
    return f"Error: An unexpected error occurred: {str(e)}"

In [None]:
# Getting video id from video url
def get_video_id(video_link):
  # Extracting the video id from url
  video_id = video_link.split('v=')[1].split('&')[0]
  return video_id

In [None]:
def main(video_link):
  video_id = get_video_id(video_link)
  video_transcript = get_video_transcript(video_id)
  video_summary = get_video_summary(video_transcript)
  # Print error if transcript cannot be generated
  if video_transcript.startswith("Error:"):
    return video_transcript
  else:
    return f"Summary -> {video_summary}"


In [None]:
if __name__ == "__main__":
  video_link = str(input("Enter the YouTube video link: "))
  print(main(video_link))