In [7]:
import operator
from keyphrase_vectorizers import KeyphraseCountVectorizer
from keybert import KeyBERT
from bertopic import BERTopic

# Initialize the KeyBERT model
kw_model = KeyBERT()

# Initialize BERTopic
topic_model = BERTopic()

def extract_keywords_with_KeyBERT(text):
    """
    Extract keywords from the given text using KeyBERT.

    Args:
        text (str): The input text from which keywords are extracted.

    Returns:
        dict: A dictionary with extracted keywords as keys and their scores as values.
    """
    try:
        # Prepare the input for KeyBERT
        docs = [text]

        # Extract keywords using KeyBERT and a specified vectorizer
        keywords = kw_model.extract_keywords(docs=docs, vectorizer=KeyphraseCountVectorizer())

        # Prepare the result as a dictionary with keywords and their scores
        keyword_dict = {kw: score for kw, score in keywords}

        # Sort the keywords by score in descending order
        sorted_keywords = dict(sorted(keyword_dict.items(), key=operator.itemgetter(1), reverse=True))

        return sorted_keywords
    except Exception as e:
        # Handle exceptions, print the error message
        print(f"Keyword extraction error: {str(e)}")
        return {"Null": 0.5}

def topic_modeling_with_BERT(text):
    """
    Perform topic modeling using BERTopic on the given text.

    Args:
        text (str): The input text for topic modeling.

    Returns:
        tuple: A tuple containing the topics as a list and their corresponding probabilities.
    """
    try:
        docs = [text]

        # Fit BERTopic model on the text data
        topics, probabilities = topic_model.fit_transform(docs)

        return topics, probabilities
    except Exception as e:
        # Handle exceptions, print the error message
        print(f"Topic modeling error: {str(e)}")
        return [], []

# Example usage for keyword extraction
text_for_keywords = """
Natural language processing (NLP) is a field of artificial intelligence
that focuses on the interaction between computers and humans through
natural language. NLP techniques enable computers to understand,
interpret, and generate human language in a valuable way.
"""

# Call the function to extract keywords
keywords = extract_keywords_with_KeyBERT(text_for_keywords)

# Print the extracted keywords and their scores
print("Keyword Extraction Results:")
for keyword, score in keywords.items():
    print(f"Keyword: {keyword}, Score: {score}")

# Example usage for topic modeling
text_for_topic_modeling = """
Topic modeling is a technique in natural language processing that
aims to discover the hidden topics within a collection of documents.
These topics can provide valuable insights into the main themes of the content.
"""

# Call the function for topic modeling
topics, probabilities = topic_modeling_with_BERT(text_for_topic_modeling)

# Print the topics and their probabilities
print("\nTopic Modeling Results:")
for topic, prob in zip(topics, probabilities):
    print(f"Topic: {topic}, Probability: {prob:.3f}")


Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

2023-10-19 08:33:29,123 - KeyphraseVectorizer - INFO - It looks like you do not have downloaded a list of stopwords yet. It is attempted to download the stopwords now.
INFO:KeyphraseVectorizer:It looks like you do not have downloaded a list of stopwords yet. It is attempted to download the stopwords now.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Keyword Extraction Results:
Keyword: nlp, Score: 0.7081
Keyword: nlp techniques, Score: 0.7062
Keyword: natural language processing, Score: 0.6793
Keyword: natural language, Score: 0.6299
Keyword: human language, Score: 0.4933
Topic modeling error: Transform unavailable when model was fit with only a single data sample.

Topic Modeling Results:


In [6]:
!pip install bertopic

Collecting bertopic
  Downloading bertopic-0.15.0-py2.py3-none-any.whl (143 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/143.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting hdbscan>=0.8.29 (from bertopic)
  Downloading hdbscan-0.8.33.tar.gz (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting umap-learn>=0.5.0 (from bertopic)
  Downloading umap-learn-0.5.4.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.8/90.8 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting cython<3,>=0.27 (from hdbscan>=0.8.29->bertopic)
  Using cached Cython-0.29.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_6

In [2]:
!pip install keyphrase-vectorizers


Collecting keyphrase-vectorizers
  Downloading keyphrase_vectorizers-0.0.11-py3-none-any.whl (29 kB)
Collecting spacy-transformers>=1.1.6 (from keyphrase-vectorizers)
  Downloading spacy_transformers-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (197 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m197.8/197.8 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting transformers<4.35.0,>=3.4.0 (from spacy-transformers>=1.1.6->keyphrase-vectorizers)
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m48.8 MB/s[0m eta [36m0:00:00[0m
Collecting spacy-alignments<1.0.0,>=0.7.2 (from spacy-transformers>=1.1.6->keyphrase-vectorizers)
  Downloading spacy_alignments-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (313 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.0/314.0 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
Coll

In [4]:
!pip install keybert


Collecting keybert
  Downloading keybert-0.8.3.tar.gz (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentence-transformers>=0.3.8 (from keybert)
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentencepiece (from sentence-transformers>=0.3.8->keybert)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: keybert, sentence-transformers
  Building wheel for keybert (setup.py) ... [?25l[?25hdone
  Created wheel for keybert: filename=keybert-0.8.3-py3-none-any.whl size=39124 sha256=89fb3f9951deaf757c2e23fae6f8cb9db37002b0df18bc16b5f0aadcfebc278b
  Stored in direct