In [32]:
import bs4 as bs
import urllib.request
import re
import nltk
import heapq
from keybert import KeyBERT

In [33]:
class Eng:
    def __init__(self,data:list):
        self.data_value = data
        self.sentence = self.get_sentence()
        
    def get_sentence(self):
        self.sentence_result = self.make_sentence(self.data_value)
        return self.sentence_result
    def make_sentence(self,list_word):
        self.sentence_value = ' '.join(list_word)
        return self.sentence_value
    def tokenize(self):
        self.sentence_list = nltk.sent_tokenize(self.sentence)
        return self.sentence_list
    def create_formatted_article(self):
        # Removing special characters and digits
        self.get_sentence()
        self.formatted_article_text = re.sub('[^a-zA-Z]', ' ', self.sentence_result )
        self.formatted_article_text = re.sub(r'\s+', ' ', self.formatted_article_text)
        return self.formatted_article_text
    def find_word_frequencies(self):
        self.tokenize()
        self.create_formatted_article()
        stopwords = nltk.corpus.stopwords.words('english')
        self.word_frequencies = {}
        for word in nltk.word_tokenize(self.formatted_article_text):
            if word not in stopwords:
                if word not in self.word_frequencies.keys():
                    self.word_frequencies[word] = 1
                else:
                    self.word_frequencies[word] += 1
        return self.word_frequencies
    def find_maximum_frequncy(self):
        self.find_word_frequencies()
        maximum_frequncy = max(self.word_frequencies.values())
        for word in self.word_frequencies.keys():
            self.word_frequencies[word] = (self.word_frequencies[word]/maximum_frequncy)
    def find_sentence_scores(self):
        self.sentence_scores = {}
        self.tokenize()
        self.find_word_frequencies()
        for sent in self.sentence_list:
            for word in nltk.word_tokenize(sent.lower()):
                if word in self.word_frequencies.keys():
                    if len(sent.split(' ')) < 30:
                        if sent not in self.sentence_scores.keys():
                            self.sentence_scores[sent] = self.word_frequencies[word]
                        else:
                            self.sentence_scores[sent] += self.word_frequencies[word]
        return self.sentence_scores
    def summary_sentences(self):
        self.find_sentence_scores()
        self.summary_sentences = heapq.nlargest(5, self.sentence_scores, key=self.sentence_scores.get)
        summary = ' '.join(self.summary_sentences)
        return self.summary_sentences
    def find_keyword(self):
        self.get_sentence()
        kw_model = KeyBERT()
        keywords = kw_model.extract_keywords(self.sentence_result)
        return kw_model.extract_keywords(self.sentence_result, keyphrase_ngram_range=(1, 1), stop_words=None)

In [34]:
eng = Eng(["More classified material found at Biden's home", 'Top secret documents reportedly found in Biden cache', 'Four unanswered questions about the Biden documents', 'How Biden and Trump secret files cases compare', 'Delay in telling public about files may haunt Biden', "Republicans want to know who visited Biden's homes", 'The problem with politicians and classified files', 'Biden under investigation', 'How much trouble is the president in over classified documents found at his home?', 'Who is the special counsel in the Biden probe?', 'Special counsel appointed to investigate Biden files', 'Second batch of classified Biden documents found', 'Jill Biden has surgery to remove cancerous skin lesions', "Biden 'surprised' about classified files discovery", 'Biden visits US-Mexico border in trip to Texas', 'Western allies to send fighting vehicles to Ukraine', 'What will change with Republicans controlling the House', "What's actually in the $1.7tn US spending bill?", "Zelensky trip shows US doesn't want peace - Russia", 'Ukraine is alive and kicking, Zelensky tells US'])

In [35]:
eng.get_sentence()


"More classified material found at Biden's home Top secret documents reportedly found in Biden cache Four unanswered questions about the Biden documents How Biden and Trump secret files cases compare Delay in telling public about files may haunt Biden Republicans want to know who visited Biden's homes The problem with politicians and classified files Biden under investigation How much trouble is the president in over classified documents found at his home? Who is the special counsel in the Biden probe? Special counsel appointed to investigate Biden files Second batch of classified Biden documents found Jill Biden has surgery to remove cancerous skin lesions Biden 'surprised' about classified files discovery Biden visits US-Mexico border in trip to Texas Western allies to send fighting vehicles to Ukraine What will change with Republicans controlling the House What's actually in the $1.7tn US spending bill? Zelensky trip shows US doesn't want peace - Russia Ukraine is alive and kicking,

In [36]:
eng.summary_sentences()

["Zelensky trip shows US doesn't want peace - Russia Ukraine is alive and kicking, Zelensky tells US",
 'Who is the special counsel in the Biden probe?']

In [37]:
eng.find_keyword()

[('biden', 0.4392),
 ('documents', 0.3501),
 ('classified', 0.3172),
 ('politicians', 0.2588),
 ('files', 0.2574)]