KeyBERT is a minimal and easy-to-use keyword extraction technique 
that leverages BERT embeddings to create keywords and keyphrases that are most similar to a document.

In [1]:
from pymongo import MongoClient 
from keybert import KeyBERT

In [2]:
# MongoDB Initialization
client = MongoClient('127.0.0.1', 27017)
calais_collection = client['FACTIVA']['calais']

In [3]:
docs = []
for doc in calais_collection.find({}).limit(10):
    if len(doc.keys()) > 1:
        docs.append(doc)
len(docs)

9

In [4]:
selected_indice = 2

In [5]:
texts = [ doc['text'] for doc in docs]

In [6]:
doc = texts[selected_indice]
model = KeyBERT('distilbert-base-nli-mean-tokens')


In [7]:
doc = """ 
Washington (CNN)President Joe Biden on Monday will reinstate the Covid-19 travel restrictions on non-US citizens who have been in Brazil, Ireland, the United Kingdom, and much of Europe, a White House official confirmed to CNN.
Biden will also extend the restrictions to travelers who have recently been to South Africa, the official said.
The step, which was first reported by Reuters, comes just one week after President Donald Trump signed an executive order in his final days in office lifting the restrictions on travelers from these countries effective January 26.

"I agree with the Secretary that this action is the best way to continue protecting Americans from COVID-19 while enabling travel to resume safely," Trump wrote in the order, referring to then-Secretary of Health and Human Services Alex Azar.
The Biden transition team, however, vowed that same night the new administration would not lift the restrictions. "With the pandemic worsening, and more contagious variants emerging around the world, this is not the time to be lifting restrictions on international travel," then-incoming White House press secretary Jen Psaki said on Twitter.
"On the advice of our medical team, the Administration does not intend to lift these restrictions on 1/26. In fact, we plan to strengthen public health measures around international travel in order to further mitigate the spread of COVID-19."
The decision to reinstate the travel restrictions -- and expand restrictions in the case of South Africa -- marks the latest effort by the Biden administration to break from Trump's discursive approach to the pandemic as cases continue to climb nationwide.
Biden said on his first full day in office on Thursday his strategy would be "based on science, not politics" as he signed a slate of coronavirus-related executive actions, including ramping up vaccination supplies and requiring international travelers to provide proof of a negative Covid-19 test prior to traveling to the US.
Many of the countries that would have been impacted by Trump's order have their own recent requirements for American travelers looking to enter their borders.

US travelers must have a negative Covid-19 test from within 72 hours prior to travel into the United Kingdom or Ireland, and in conjunction with proof of a completed Declaration of Traveler's Health to enter Brazil. American travelers generally cannot enter countries such as Spain, Germany, France, Italy and Sweden without meeting specific requirements.
This story has been updated to include additional information.
"""

In [8]:
keywords = model.extract_keywords(doc,  keyphrase_ngram_range=(1, 2), top_n=10)

In [9]:
keywords

['monday reinstate',
 'cnn president',
 'donald trump',
 'office thursday',
 'cnn biden',
 'trump signed',
 'president donald',
 'biden monday',
 'week president',
 'thursday strategy']

In [10]:
model.extract_keywords(doc, keyphrase_ngram_range=(1, 2), stop_words='english', use_mmr=True, diversity=0.4, top_n=20)

['monday reinstate',
 'donald trump',
 'cnn president',
 'office thursday',
 'ramping vaccination',
 'week president',
 'break trump',
 'twitter advice',
 'pandemic worsening',
 'science politics',
 'slate coronavirus',
 'test 72',
 'politics signed',
 'white house',
 'covid 19',
 'new administration',
 'president joe',
 'reinstate covid',
 'effective january',
 'italy sweden']

In [11]:
tags = [doc['socialTag'] for doc in docs]
tags = [tag['name'] for tag in tags[selected_indice]]
tags

['Oil companies',
 'Companies',
 'Industries',
 'Caltex',
 'Texaco',
 'BP',
 'Oil refineries']