In [1]:
from keybert import KeyBERT

In [84]:
class Keyword():
    def __init__(self, language):
        supported_languages = ['it', 'fr']
        if language == 'en':
            self.stopwords = 'english'
            model = 'xlm-r-distilroberta-base-paraphrase-v1'
        elif language in supported_languages:
            with open(f'stopwords_{language}.txt', 'r') as file:
                temp = file.readlines()
                self.stopwords = [x.strip('\n') for x in temp]
        else:
            raise KeyError (f'{language} language is not supported :(')
            
        model = 'distiluse-base-multilingual-cased-v1'    
        self.model = KeyBERT(model)
        
    def extract(self, doc, n_kw=5, min_len=1, max_len=1):
        keywords = self.model.extract_keywords(doc,
                                               stop_words=self.stopwords,
                                               top_n=n_kw,
                                               keyphrase_ngram_range=(min_len, max_len))
        keywords = [x[0] for x in keywords]
        return keywords

In [36]:
doc_en = """
Richard Phillips Feynman ForMemRS (/ˈfaɪnmən/; May 11, 1918 – February 15, 1988) was an American theoretical physicist, known for his work in the path integral formulation of quantum mechanics, the theory of quantum electrodynamics, the physics of the superfluidity of supercooled liquid helium, as well as his work in particle physics for which he proposed the parton model. For contributions to the development of quantum electrodynamics, Feynman received the Nobel Prize in Physics in 1965 jointly with Julian Schwinger and Shin'ichirō Tomonaga.

Feynman developed a widely used pictorial representation scheme for the mathematical expressions describing the behavior of subatomic particles, which later became known as Feynman diagrams. During his lifetime, Feynman became one of the best-known scientists in the world. In a 1999 poll of 130 leading physicists worldwide by the British journal Physics World, he was ranked the seventh greatest physicist of all time.[2]

He assisted in the development of the atomic bomb during World War II and became known to a wide public in the 1980s as a member of the Rogers Commission, the panel that investigated the Space Shuttle Challenger disaster. Along with his work in theoretical physics, Feynman has been credited with pioneering the field of quantum computing and introducing the concept of nanotechnology. He held the Richard C. Tolman professorship in theoretical physics at the California Institute of Technology.
"""

doc_it = """
Il termine "guerra mondiale" fu coniato all'inizio del XX secolo, negli anni antecedenti alla prima guerra mondiale stessa. L'attivazione degli opposti sistemi di alleanze europee, ovvero la Triplice alleanza (composta da Impero tedesco, Impero austro-ungarico e Regno d'Italia) e l'Intesa (formata da Francia, Impero russo e Regno Unito) avrebbe portato a un conflitto globale, a causa dei grandi imperi coloniali posseduti da molte delle potenze d'Europa in tutto il mondo: le colonie, infatti, avrebbero rivestito un importante ruolo strategico. Le stesse considerazioni portarono a comprendere che l'eventuale conflitto non sarebbe rimasto isolato alle sole colonie, ma che piuttosto si sarebbe facilmente potuto espandere ad altri territori, portando a estensioni dei combattimenti inimmaginabili nell'era pre-coloniale.

Anche se il primo conflitto del 1914-1918 venne definito "mondiale" già pochi giorni dopo il suo scoppio, sembra che il termine "Prima" non sia stato anteposto fino agli inizi degli anni venti. Il conflitto rimase infatti assai più conosciuto come Grande Guerra ("Guerra del '15-'18" in Italia, che vi partecipò dal 24 maggio 1915) fino a quando lo scoppio della seconda guerra mondiale rese necessaria la distinzione."""

In [85]:
kw_obj = Keyword('it')
kw_obj.extract(doc_it)

['guerra', '1914', 'alleanza', 'conflitto', '1915']

In [65]:
kw_obj = Keyword('en')
kw_obj.extract(doc_en)

['electrodynamics', 'superfluidity', 'feynman', 'supercooled', 'schwinger']