In [1]:
# !pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_lg-0.4.0.tar.gz
import spacy
nlp = spacy.load("en_core_sci_lg")

import yake
kw_extractor = yake.KeywordExtractor()
language = "en"
max_ngram_size = 3
deduplication_threshold = 0.9
numOfKeywords = 20
custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, top=numOfKeywords, features=None)

from rake_nltk import Rake
rake_nltk_var = Rake()

# %pip install summa
from summa import keywords as Skeywords

# %pip install keybert
from keybert import KeyBERT
kw_model = KeyBERT(model='all-mpnet-base-v2')

In [2]:
texts = [ 
          "Bachelor's Degree from an accredited college or university with a major in Computer Science, Information Systems, Engineering, Business, or other related scientific or technical discipline or three (3) years of equivalent experience in a related field.  A Master's Degree is preferred.",
          "Bachelor's Degree from an accredited college or university in Engineering, Computer Science, Information Systems, Business or other related discipline. Master's degree or project management certification is preferred.",
          "Bachelor's Degree from an accredited college or university with a major in Computer Science, Information Systems, Engineering, Business, or other related scientific or technical discipline or three (3) years of equivalent experience in a related field.  A Master's Degree is preferred.",
          "Bachelor's Degree from an accredited college or university in Engineering, Computer Science, Information Systems, Business, or other related Discipline.  Master's degree preferred.",
          "Bachelor's degree from an accredited college or university in Engineering, Computer Science, Information Systems, Business, or other related discipline.",
          "Bachelor's Degree from an accredited college or university in Engineering, Computer Science, Information Systems, Public Health, or other related discipline. Master's degree and/or project management certification is preferred. PMI-ACP, CSM or equivalent certification.",
          "Bachelor's Degree from an accredited college or university in Business, Human Resources Management or a related field. An MBA or MPA is preferred.",
          "Bachelor's degree or foreign equivalent required from an accredited institution. Will also consider three years of progressive experience in the specialty in lieu of every year of education.",
          "Bachelor's Degree from an accredited college or university in Engineering, Computer Science, Information Systems, Public Health, or other related discipline. Master's degree and/or project management certification is preferred. PMP certification. PMI-ACP, CSM or equivalent certification."
          ]

Using Spacy NLP

In [3]:
for text in texts:
    doc = nlp(text)
    print("Query'Keywords: ",doc.ents)
    print('\n')

Query'Keywords:  (Bachelor's, Degree, accredited, college, university, Computer Science, Information Systems, Engineering, Business, scientific, technical discipline, years, equivalent, experience, Master's, Degree)


Query'Keywords:  (Bachelor's, Degree, accredited, college, university, Engineering, Computer Science, Information Systems, Business, discipline, Master's, degree, project, management, certification)


Query'Keywords:  (Bachelor's, Degree, accredited, college, university, Computer Science, Information Systems, Engineering, Business, scientific, technical discipline, years, equivalent, experience, Master's, Degree)


Query'Keywords:  (Bachelor's, Degree, accredited, college, university, Engineering, Computer Science, Information Systems, Business, Discipline, Master's, degree)


Query'Keywords:  (Bachelor's, degree, accredited, college, university, Engineering, Computer Science, Information Systems, Business, discipline)


Query'Keywords:  (Bachelor's, Degree, accredited, c

Using Yake NLP

In [4]:
# Yet Another Keyword Extractor (Yake) library selects the most important keywords using the text statistical features method 
# from the article. With the help of YAKE, you can control the extracted keyword word count and other features.

In [5]:
for text in texts:
     keywords = custom_kw_extractor.extract_keywords(text)
     print("Query'Keywords: ")
     for kw in keywords:
          print(kw)
     print('\n')

Query'Keywords: 
('Information Systems', 0.0031498810108932717)
('Computer Science', 0.004053542477137147)
('major in Computer', 0.010722405245351528)
('years of equivalent', 0.016018634870496127)
('Bachelor Degree', 0.017178721139731297)
('related field', 0.01761336752910954)
('accredited college', 0.02216221073515605)
('college or university', 0.02216221073515605)
('scientific or technical', 0.02216221073515605)
('technical discipline', 0.02216221073515605)
('equivalent experience', 0.02216221073515605)
('related scientific', 0.024383244337604453)
('Master Degree', 0.03466016019794486)
('Engineering', 0.04054602715245258)
('Business', 0.04054602715245258)
('Science', 0.056035617376018844)
('Information', 0.056035617376018844)
('Systems', 0.056035617376018844)
('Computer', 0.0720466338245492)
('Degree', 0.0788653742669483)


Query'Keywords: 
('Computer Science', 0.0036329479065606626)
('Information Systems', 0.0036329479065606626)
('university in Engineering', 0.010313650872216652)
('

Using Rake_nltk NLP

In [6]:
# You can form a powerful keyword extraction method by combining the Rapid Automatic Keyword Extraction (RAKE) algorithm 
# with the NLTK toolkit.

In [7]:
for text in texts:
    rake_nltk_var.extract_keywords_from_text(text)
    keyword_extracted = rake_nltk_var.get_ranked_phrases()
    print("Query'Keywords: ",keyword_extracted)
    print('\n')

Query'Keywords:  ['technical discipline', 'related scientific', 'related field', 'information systems', 'equivalent experience', 'computer science', 'accredited college', 'years', 'university', 'three', 'preferred', 'master', 'major', 'engineering', 'degree', 'degree', 'business', 'bachelor', '3']


Query'Keywords:  ['project management certification', 'related discipline', 'information systems', 'computer science', 'accredited college', 'university', 'preferred', 'master', 'engineering', 'degree', 'degree', 'business', 'bachelor']


Query'Keywords:  ['technical discipline', 'related scientific', 'related field', 'information systems', 'equivalent experience', 'computer science', 'accredited college', 'years', 'university', 'three', 'preferred', 'master', 'major', 'engineering', 'degree', 'degree', 'business', 'bachelor', '3']


Query'Keywords:  ['related discipline', 'information systems', 'computer science', 'accredited college', 'degree preferred', 'degree', 'university', 'master', 

Using TextRank NLP

In [8]:
# TextRank is an unsupervised method for extracting keywords and sentences. It is based on a graph where each node is a word, 
# and edges represent relationships between words which are formed by defining the co-occurrence of words within a moving 
# window of a predetermined size. 

In [9]:
for text in texts:
    TR_keywords = Skeywords.keywords(text, scores=True)
    print("Query'Keywords: ",TR_keywords[0:10])
    print('\n')

Query'Keywords:  [('accredited college', 0.7062362183032247)]


Query'Keywords:  [('accredited college', 0.7060788754021493)]


Query'Keywords:  [('accredited college', 0.7062362183032247)]


Query'Keywords:  [('preferred', 0.617594496358518), ('degree', 0.6175944963585178)]


Query'Keywords:  [('college', 0.7070828199096467)]


Query'Keywords:  [('accredited college', 0.7070697689251593), ('information', 0.005221330786619044)]


Query'Keywords:  [('college', 0.7070828199096467)]


Query'Keywords:  [('equivalent', 0.8164676848469393)]


Query'Keywords:  [('accredited college', 0.7070441503327372), ('discipline', 0.0053793232687609025)]




Using KeyBert NLP

In [10]:
# KeyBERT is a simple, easy-to-use keyword extraction algorithm that takes advantage of SBERT embeddings to generate 
# keywords and key phrases from a document that are more similar to the document.

In [11]:
for text in texts:
    keywords = kw_model.extract_keywords(text, 

                                        keyphrase_ngram_range=(1, 3), 

                                        stop_words='english', 

                                        highlight=False,

                                        top_n=10)

    keywords_list= list(dict(keywords).keys())

    print("Query'Keywords: ",keywords_list)
    print('\n')


Query'Keywords:  ['major computer science', 'master degree preferred', 'university major computer', 'degree preferred', 'information systems engineering', 'computer science', 'computer science information', 'bachelor degree accredited', 'bachelor degree', 'systems engineering business']


Query'Keywords:  ['project management certification', 'degree project management', 'management certification preferred', 'management certification', 'engineering computer science', 'certification preferred', 'systems business related', 'master degree', 'university engineering', 'project management']


Query'Keywords:  ['major computer science', 'master degree preferred', 'university major computer', 'degree preferred', 'information systems engineering', 'computer science', 'computer science information', 'bachelor degree accredited', 'bachelor degree', 'systems engineering business']


Query'Keywords:  ['master degree preferred', 'degree preferred', 'engineering computer science', 'bachelor degree', '