In [23]:
from sentence_transformers import SentenceTransformer
from keybert import KeyBERT
import time

# different sections in a job description
overall = """
We are looking for a passionate, "somewhat technical" recent graduate who lives and breathes Artificial Intelligence. Do you check Product Hunt daily for the latest AI tools? Did you spend your senior year figuring out how to integrate ChatGPT into your workflows?
As our AI Innovation Intern, you will bridge the gap between abstract AI concepts and real-world business solutions. You won’t just be researching; you will be building prototypes, testing new models, and automating internal processes. This is a hands-on role for someone who knows Python but prefers building practical tools over writing academic papers.
"""

key_respon = """
Rapid Prototyping: Build quick Proof of Concepts (PoCs) using Python, Streamlit, or no-code platforms to demonstrate how AI can solve specific business problems.
Prompt Engineering & Evaluation: Design, test, and optimize system prompts for LLMs (OpenAI, Anthropic, Gemini, Llama) to ensure consistent and high-quality outputs.
Tool Scouting: Act as our internal "AI Scout." Keep up with the latest developments (Agents, RAG, Multimodal models) and recommend tools that can improve our stack.
Workflow Automation: Use tools like Zapier/Make or write Python scripts to connect AI models with our existing software (Slack, Email, CRM).
Knowledge Base Management: Assist in preparing and cleaning data for RAG (Retrieval-Augmented Generation) systems so our AI actually knows about our business.
"""

required = """
Recent Graduate: Bachelor’s degree in Computer Science, Data Science, Information Systems, or a related technical field.
Python Proficiency: You are comfortable writing scripts to manipulate data and call APIs. You know your way around libraries like pandas, requests, and standard JSON handling.
API Experience: You have generated an API key (OpenAI, Hugging Face, etc.) and successfully made calls to an endpoint.
GenAI Literacy: You understand the difference between Zero-shot and Few-shot prompting, and you know what "Context Window" and "Temperature" mean.
"""

preferred = """
Experience with LangChain or LlamaIndex.
Familiarity with Vector Databases (Pinecone, Weaviate, or ChromaDB).
Experience building basic front-ends using Streamlit or Gradio.
Knowledge of automation tools like Make.com or Zapier.
"""

In [24]:
# loading default all-MiniLM-L6-v2 model (384 dimensional dense vector space)
embedded_model = SentenceTransformer('all-MiniLM-L6-v2')
miniLM_model = KeyBERT(model=embedded_model)

# run 10 times to get average time
start = time.time()
for i in range(10):
    keywords_overall = miniLM_model.extract_keywords(overall, keyphrase_ngram_range=(1, 2))
    keywords_respon = miniLM_model.extract_keywords(key_respon, keyphrase_ngram_range=(1, 2))
    keywords_required = miniLM_model.extract_keywords(required, keyphrase_ngram_range=(1, 2))
    keywords_preferred = miniLM_model.extract_keywords(preferred, keyphrase_ngram_range=(1, 2))
end = time.time()
print("Average time for KeyBERT with all-MiniLM-L6-v2:", (end - start) / 10)
print()

print("Overall Keywords:", keywords_overall)
print()
print("Key Responsibilities Keywords:", keywords_respon)
print()
print("Required Skills Keywords:", keywords_required)
print()
print("Preferred Skills Keywords:", keywords_preferred)

Average time for KeyBERT with all-MiniLM-L6-v2: 0.21077537536621094

Overall Keywords: [('ai innovation', 0.5662), ('ai tools', 0.5603), ('innovation intern', 0.5375), ('workflows ai', 0.5349), ('latest ai', 0.5231)]

Key Responsibilities Keywords: [('workflow automation', 0.5472), ('automation', 0.5348), ('automation use', 0.5051), ('prototyping', 0.5002), ('rapid prototyping', 0.4818)]

Required Skills Keywords: [('python proficiency', 0.6676), ('python', 0.4567), ('api experience', 0.4565), ('proficiency', 0.4449), ('genai literacy', 0.4438)]

Preferred Skills Keywords: [('vector databases', 0.4651), ('langchain llamaindex', 0.4381), ('experience building', 0.436), ('databases pinecone', 0.4337), ('tools', 0.4217)]


In [25]:
# pip install hf_xet

In [26]:
# loading all-mpnet-base-v2 model (768 dimensional dense vector space)
embedded_model = SentenceTransformer('all-mpnet-base-v2')
mpnet_model = KeyBERT(model=embedded_model)


# run 10 times to get average time
start = time.time()
for i in range(10):
    keywords_overall = mpnet_model.extract_keywords(overall, keyphrase_ngram_range=(1, 2))
    keywords_respon = mpnet_model.extract_keywords(key_respon, keyphrase_ngram_range=(1, 2))
    keywords_required = mpnet_model.extract_keywords(required, keyphrase_ngram_range=(1, 2))
    keywords_preferred = mpnet_model.extract_keywords(preferred, keyphrase_ngram_range=(1, 2))
end = time.time()
print("Average time for KeyBERT with all-mpnet-base-v2:", (end - start) / 10)
print()

print("Overall Keywords:", keywords_overall)
print()
print("Key Responsibilities Keywords:", keywords_respon)
print()
print("Required Skills Keywords:", keywords_required)
print()
print("Preferred Skills Keywords:", keywords_preferred)

Average time for KeyBERT with all-mpnet-base-v2: 0.5121243476867676

Overall Keywords: [('ai innovation', 0.6088), ('innovation intern', 0.6075), ('ai', 0.57), ('ai concepts', 0.5245), ('latest ai', 0.5211)]

Key Responsibilities Keywords: [('demonstrate ai', 0.5752), ('ai models', 0.5301), ('internal ai', 0.4987), ('ai', 0.4972), ('systems ai', 0.4794)]

Required Skills Keywords: [('api experience', 0.4628), ('openai', 0.4597), ('python proficiency', 0.4221), ('apis', 0.4176), ('key openai', 0.3813)]

Preferred Skills Keywords: [('tools like', 0.393), ('automation tools', 0.3786), ('building basic', 0.37), ('knowledge automation', 0.3243), ('experience building', 0.2926)]
