Import the libraries

In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest
from transformers import pipeline
from transformers import BartForConditionalGeneration, BartTokenizer

Get youtube transcript from youtube API

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi
youtube_video=input("Paste YouTube video link")
video_id=youtube_video.split("=")[1]
YouTubeTranscriptApi.get_transcript(video_id)
transcript=YouTubeTranscriptApi.get_transcript(video_id)

Get text from transcript

In [None]:
text=""
for i in transcript:
    text+=i['text']

Correcting mistakes

In [None]:
import wordninja
text=" ".join(wordninja.split(text))

Tokenizing words

In [None]:
stopwords=list(STOP_WORDS)
nlp = spacy.load("en_core_web_sm")
doc=nlp(text)
tokens=[token.text for token in doc]

Removing stopwords and calculating word frequencies

In [None]:
word_frequencies={}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text]=1
            else:
                word_frequencies[word.text]+=1

Get maximum frequency of words

In [None]:
max_frequencies=max(word_frequencies.values())

Normalizing frequencies

In [None]:
for word in word_frequencies.keys():
    word_frequencies[word]=word_frequencies[word]/max_frequencies

Tokenizing sentences

In [None]:
sentence_tokens=[sent for sent in doc.sents]

Calculating sentence scores for each sentence

In [None]:
sentence_scores={}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent]=word_frequencies[word.text.lower()]
            else:
                sentence_scores[sent]+=word_frequencies[word.text.lower()]

Taking 50% of the important sentences

In [None]:
length=len(sentence_tokens)*0.5

Generating final text to summarize

In [None]:
final_text=nlargest(int(length), sentence_scores, key=sentence_scores.get)

Final text

In [None]:
final=" ".join([word.text for word in final_text ])

Generating final summary

In [None]:
summarizer=pipeline('summarization')

Breaking down text to smaller chunks

In [None]:
num_iters=int(len(final)/1000)
sum_text=[]
for i in range(0,num_iters+1):
    start=0
    start=i*1000
    end=(i+1)*1000
    out=summarizer(final[start:end],min_length=1)
    out=out[0]
    out=out['summary_text']
    sum_text.append(out)

Printing Summary

In [None]:
summary="".join(sum_text)
print(summary)

Evaluating efficiency of summary

In [None]:
from rouge_score import rouge_scorer

def evaluate_summary(reference, generated):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return scores

reference = text
generated = summary
print(evaluate_summary(reference, generated))


In [None]:
from textstat import flesch_reading_ease

summary = summary
score = flesch_reading_ease(summary)
print(f"Reading Ease Score: {score}")