In [2]:
import bs4 as bs
import urllib.request
import re
import nltk
import heapq
from keybert import KeyBERT

In [3]:
class Eng:
    def __init__(self,data:list):
        self.data_value = data
        self.sentence = self.get_sentence()
        
    def get_sentence(self):
        self.sentence_result = self.make_sentence(self.data_value)
        return self.sentence_result
    def make_sentence(self,list_word):
        self.sentence_value = ' '.join(list_word)
        return self.sentence_value
    def tokenize(self):
        self.sentence_list = nltk.sent_tokenize(self.sentence)
        return self.sentence_list
    def create_formatted_article(self):
        # Removing special characters and digits
        self.get_sentence()
        self.formatted_article_text = re.sub('[^a-zA-Z]', ' ', self.sentence_result )
        self.formatted_article_text = re.sub(r'\s+', ' ', self.formatted_article_text)
        return self.formatted_article_text
    def find_word_frequencies(self):
        self.tokenize()
        self.create_formatted_article()
        stopwords = nltk.corpus.stopwords.words('english')
        self.word_frequencies = {}
        for word in nltk.word_tokenize(self.formatted_article_text):
            if word not in stopwords:
                if word not in self.word_frequencies.keys():
                    self.word_frequencies[word] = 1
                else:
                    self.word_frequencies[word] += 1
        return self.word_frequencies
    def find_maximum_frequncy(self):
        self.find_word_frequencies()
        maximum_frequncy = max(self.word_frequencies.values())
        for word in self.word_frequencies.keys():
            self.word_frequencies[word] = (self.word_frequencies[word]/maximum_frequncy)
    def find_sentence_scores(self):
        self.sentence_scores = {}
        self.tokenize()
        self.find_word_frequencies()
        for sent in self.sentence_list:
            for word in nltk.word_tokenize(sent.lower()):
                if word in self.word_frequencies.keys():
                    if len(sent.split(' ')) < 30:
                        if sent not in self.sentence_scores.keys():
                            self.sentence_scores[sent] = self.word_frequencies[word]
                        else:
                            self.sentence_scores[sent] += self.word_frequencies[word]
        return self.sentence_scores
    def summary_sentences(self):
        self.find_sentence_scores()
        self.summary_sentences = heapq.nlargest(5, self.sentence_scores, key=self.sentence_scores.get)
        summary = ' '.join(self.summary_sentences)
        return self.summary_sentences
    def find_keyword(self):
        self.get_sentence()
        kw_model = KeyBERT()
        keywords = kw_model.extract_keywords(self.sentence_result)
        return kw_model.extract_keywords(self.sentence_result, keyphrase_ngram_range=(1, 1), stop_words=None)

In [24]:
eng = Eng(["More classified material found at Biden's home", 'Top secret documents reportedly found in Biden cache', 'Four unanswered questions about the Biden documents', 'How Biden and Trump secret files cases compare', 'Delay in telling public about files may haunt Biden', "Republicans want to know who visited Biden's homes", 'The problem with politicians and classified files', 'Biden under investigation', 'How much trouble is the president in over classified documents found at his home?', 'Who is the special counsel in the Biden probe?', 'Special counsel appointed to investigate Biden files', 'Second batch of classified Biden documents found', 'Jill Biden has surgery to remove cancerous skin lesions', "Biden 'surprised' about classified files discovery", 'Biden visits US-Mexico border in trip to Texas', 'Western allies to send fighting vehicles to Ukraine', 'What will change with Republicans controlling the House', "What's actually in the $1.7tn US spending bill?", "Zelensky trip shows US doesn't want peace - Russia", 'Ukraine is alive and kicking, Zelensky tells US'])

In [25]:
eng.get_sentence()


"More classified material found at Biden's home Top secret documents reportedly found in Biden cache Four unanswered questions about the Biden documents How Biden and Trump secret files cases compare Delay in telling public about files may haunt Biden Republicans want to know who visited Biden's homes The problem with politicians and classified files Biden under investigation How much trouble is the president in over classified documents found at his home? Who is the special counsel in the Biden probe? Special counsel appointed to investigate Biden files Second batch of classified Biden documents found Jill Biden has surgery to remove cancerous skin lesions Biden 'surprised' about classified files discovery Biden visits US-Mexico border in trip to Texas Western allies to send fighting vehicles to Ukraine What will change with Republicans controlling the House What's actually in the $1.7tn US spending bill? Zelensky trip shows US doesn't want peace - Russia Ukraine is alive and kicking,

In [26]:
eng.summary_sentences()

["Zelensky trip shows US doesn't want peace - Russia Ukraine is alive and kicking, Zelensky tells US",
 'Who is the special counsel in the Biden probe?']

In [27]:
eng.find_keyword()

[('biden', 0.4392),
 ('documents', 0.3501),
 ('classified', 0.3172),
 ('politicians', 0.2588),
 ('files', 0.2574)]

In [28]:
eng.tokenize()

["More classified material found at Biden's home Top secret documents reportedly found in Biden cache Four unanswered questions about the Biden documents How Biden and Trump secret files cases compare Delay in telling public about files may haunt Biden Republicans want to know who visited Biden's homes The problem with politicians and classified files Biden under investigation How much trouble is the president in over classified documents found at his home?",
 'Who is the special counsel in the Biden probe?',
 "Special counsel appointed to investigate Biden files Second batch of classified Biden documents found Jill Biden has surgery to remove cancerous skin lesions Biden 'surprised' about classified files discovery Biden visits US-Mexico border in trip to Texas Western allies to send fighting vehicles to Ukraine What will change with Republicans controlling the House What's actually in the $1.7tn US spending bill?",
 "Zelensky trip shows US doesn't want peace - Russia Ukraine is alive

In [19]:
eng.find_sentence_scores()

{'German Chancellor Olaf Scholz has decided to send Leopard 2 tanks to Ukraine, and allow other countries to do the same, reports in Germany say.': 24,
 'Leopard 2s are made in Germany and Berlin needs to approve their export.': 4,
 'Germany has been hesitant to send its own - or allow other nations to send theirs - over concerns it could escalate the conflict with Russia.': 23,
 'But now Mr Scholz has decided to send the at least a company of Leopard 2 A6 tanks, several German outlets say.': 24,
 'The news was broken by Der Spiegel in Germany, citing government sources, before being seemingly confirmed elsewhere.': 9,
 'There has been no official statement from the German government yet.': 6,
 'Ukraine sees the tanks as vital for breaking through Russian lines and to beat an anticipated Russian offensive this spring.': 16,
 'Ukraine President Volodymyr Zelensky believes about 300 Leopard tanks would help it defeat Russia.': 12,
 'Allied nations have become frustrated at what they perc

In [20]:
eng.find_word_frequencies()

{'German': 4,
 'Chancellor': 1,
 'Olaf': 1,
 'Scholz': 2,
 'decided': 2,
 'send': 7,
 'Leopard': 5,
 'tanks': 8,
 'Ukraine': 9,
 'allow': 2,
 'countries': 1,
 'reports': 2,
 'Germany': 8,
 'say': 2,
 'made': 1,
 'Berlin': 1,
 'needs': 1,
 'approve': 1,
 'export': 1,
 'hesitant': 1,
 'nations': 2,
 'concerns': 1,
 'could': 1,
 'escalate': 1,
 'conflict': 1,
 'Russia': 2,
 'But': 1,
 'Mr': 1,
 'least': 1,
 'company': 2,
 'A': 3,
 'several': 1,
 'outlets': 1,
 'usually': 1,
 'number': 2,
 'Leopards': 1,
 'Poland': 1,
 'wants': 1,
 'Challenger': 1,
 'UK': 1,
 'already': 1,
 'committed': 1,
 'The': 6,
 'news': 1,
 'broken': 1,
 'Der': 1,
 'Spiegel': 1,
 'citing': 1,
 'government': 2,
 'sources': 1,
 'seemingly': 1,
 'confirmed': 1,
 'elsewhere': 1,
 'There': 1,
 'official': 1,
 'statement': 1,
 'yet': 2,
 'sees': 1,
 'vital': 1,
 'breaking': 1,
 'Russian': 3,
 'lines': 1,
 'beat': 1,
 'anticipated': 1,
 'offensive': 1,
 'spring': 1,
 'President': 1,
 'Volodymyr': 1,
 'Zelensky': 1,
 'believ

In [13]:
from bs4 import BeautifulSoup
import requests

url = "https://www.bbc.com/news/world-europe-64391272"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
p_tags = soup.find_all("p")
p_texts = [tag.get_text() for tag in p_tags]
print(p_texts)

['German Chancellor Olaf Scholz has decided to send Leopard 2 tanks to Ukraine, and allow other countries to do the same, reports in Germany say.', 'Leopard 2s are made in Germany and Berlin needs to approve their export.', 'Germany has been hesitant to send its own - or allow other nations to send theirs - over concerns it could escalate the conflict with Russia.', 'But now Mr Scholz has decided to send the at least a company of Leopard 2 A6 tanks, several German outlets say.', 'A company is usually 14 tanks - the same number of Leopards Poland wants to send, and the same number of Challenger 2 tanks the UK has already committed to Ukraine.', 'The news was broken by Der Spiegel in Germany, citing government sources, before being seemingly confirmed elsewhere.', 'There has been no official statement from the German government yet.', 'Ukraine sees the tanks as vital for breaking through Russian lines and to beat an anticipated Russian offensive this spring.', 'Ukraine President Volodymy