# Auto-Generate a Summary from Long Youtube Videos Using AI


*First we will import Whisper Model,pyTube to download youtube videos,tranformers model to create summary,list to deal with list data and logging to show the basic errors that may be there while creating the summary.</br>
Whole Approach of getting the Youtube video summary is to first download the video using pytube library and then convert the youtube video in text format using Whishper model and then find the summary of that text content using BART model.

In [48]:
import whisper
from pytube import YouTube
from transformers import pipeline
from typing import List
import logging
logging.basicConfig(filename='demo.log', encoding='utf-8', level=logging.ERROR)

*Give the URL of youtube video and a file name which you want to give to the video for your local system.

In [None]:
URL = "https://www.youtube.com/watch?v=p0Tfs7VNp7s"
VIDEO_NAME = "demo"

*Now we will download the audio .mp3 format from the whole youtube video.We will use YouTube function from pytube library and save it with the file name given above and .mp3 format. 

In [38]:

def download_audio_from_youtube(url: str, video_name: str) -> str:
    video_url = YouTube(url)
    video = video_url.streams.filter(only_audio=True).first()
    filename = video_name + ".mp3"
    video.download(filename=filename)
    return filename

*Now we will load the whisper model from whisper library then we will convert whole .mp3 file audio into text format using transcribe function given in whisper.We will create a .txt file and saves it in that txt file. 

In [None]:
def load_whisper_model(model_name: str = "medium"):
    """Load the medium multilingual Whisper model."""
    return whisper.load_model(model_name)


def transcribe_audio_to_text(model, audio_path: str, language: str = "English"):
    """Transcribe the audio using the Whisper model."""
    return model.transcribe(audio_path, fp16=False, language=language)


def save_text_to_file(text: str, file_name: str):
    """Save the transcribed text to a file."""
    try:
        with open(file_name, "w+") as file:
            file.write(text)
    except (IOError, OSError, FileNotFoundError, PermissionError) as e:
        logging.debug(f"Error in file operation: {e}")


def get_text(url: str, video_name: str) -> None:
    model = load_whisper_model()
    audio_path = download_audio_from_youtube(url, video_name)
    result = transcribe_audio_to_text(model, audio_path)
    save_text_to_file(result["text"], video_name + ".txt")


get_text(url=URL, video_name=VIDEO_NAME)

*Whole text will be large in size so we will break it in form of small chuncks or tokens using nltk library.

In [None]:
import nltk
nltk.download('punkt')


def read_file(file_name: str) -> str:
    try:
        with open(file_name + ".txt", "r", encoding="utf8") as file:
            return file.read()
    except FileNotFoundError as e:
        logging.error(f"{e}: File '{file_name}.txt' not found.")
        return ""
    except Exception as e:
        logging.error(f"Error reading file: {e}")
        return ""


def split_text_into_chunks(document: str, max_tokens: int) -> List[str]:
    if not document:
        return []

    chunks, current_chunk, current_length = [], [], 0

    try:
        for sentence in nltk.sent_tokenize(document):
            sentence_length = len(sentence)

            if current_length + sentence_length < max_tokens:
                current_chunk.append(sentence)
                current_length += sentence_length
            else:
                chunks.append(" ".join(current_chunk))
                current_chunk, current_length = [sentence], sentence_length

        if current_chunk:
            chunks.append(" ".join(current_chunk))

        return chunks
    except Exception as e:
        logging.error(f"Error splitting text into chunks: {e}")
        return []


long_text = read_file(VIDEO_NAME)
if long_text:
    text_chunks = split_text_into_chunks(long_text, max_tokens=4000)
    logging.info(f"Text chunks: {text_chunks}")
else:
    logging.error("Error: Unable to process the text.")

*Here we will finally set the different parametres for BART model.We will create a pipeline model summarizer which will be passed to BART summarizer and It will give us the summary of whole text file in sort form.

In [42]:
from typing import Callable, List, Dict
bart_params = {
    "max_length": 124,
    "min_length": 30,
    "do_sample": False,
    "truncation": True,
    "repetition_penalty": 10.0,
}


def create_summarizer(model: str) -> Callable:
    summarizer = pipeline("summarization", model=model)
    return summarizer


def get_summary_bart(list_chunks: List[str], summarizer: Callable, summarization_params: Dict[str, int]) -> str:
    try:
        summaries = [
            summarizer(chunk, **summarization_params)[0]["summary_text"]
            for chunk in list_chunks
        ]
        return " ".join(summaries)
    except Exception as e:
        logging.error(f"Error generating summaries: {e}")
        return ""

*Now We will save the whole summary of the youtube video in a file by using save_summary_to_file function.

In [43]:
def save_summary_to_file(summary: str, file_name: str) -> None:
    try:
        with open(f"{file_name}.txt", "a") as fp:
            fp.write(summary)
    except Exception as e:
        logging.error(f"Error saving summary to file: {e}")

*Text_chunks are of length 4 here.

In [44]:
len(text_chunks)

4

*Now we will actually call all the functions for creating summarizer,getting summary and then saving summary to a file.If length of the summary of the video is greater than 5000 then we will split the whole sumamry in form of chunks and impliment the summary function again in small chuncks.

In [None]:
# Assume text_chunks is already defined and contains the chunks of text from the previous steps
summarizer = create_summarizer("facebook/bart-large-cnn")
summary = get_summary_bart(text_chunks, summarizer, bart_params)
save_summary_to_file(summary, f"summary_{VIDEO_NAME}")

if len(summary) > 5000:
    # If the summary is to long we can reapply the summarization function
    text_chunks = split_text_into_chunks(summary, max_tokens=1000)
    short_summary = get_summary_bart(text_chunks, summarizer, bart_params)
    save_summary_to_file(short_summary, f"short_summary_{VIDEO_NAME}")
    logging.info("Summary saved to file.")

*We can also save the whole summary into a file named summary_demo.txt

In [None]:
file_path = 'summary_demo.txt'  # Replace with the actual file path
file = open(file_path, 'r')

# Step 2: Read the contents of the file
file_contents = file.read()

# Step 3: Close the file
file.close()

# Now you can work with the contents of the file stored in the variable 'file_contents'
print(file_contents)

<a href = "https://towardsdatascience.com/how-to-auto-generate-a-summary-from-long-youtube-videos-using-ai-a2a542b6698d">For further reading and Support,You can use this article.<br>Thank you</a>
