In [None]:
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Sample job description
job_description = """
We are looking for a Python developer with experience in Django, REST APIs, and PostgreSQL. 
Knowledge of Docker and cloud platforms is a plus.
"""

# Sample CVs
cv_texts = [
    "Experienced backend engineer skilled in Python, Flask, and SQL databases. Worked with REST APIs and Docker.",
    "Frontend developer with expertise in React, JavaScript, and UI/UX design. No backend experience.",
    "Full-stack developer proficient in Python, Django, RESTful services, PostgreSQL, and cloud deployment on AWS.",
    "Data scientist with knowledge of Python, Pandas, machine learning, and data visualization tools.",
    "DevOps engineer experienced with Docker, Kubernetes, CI/CD pipelines, and cloud infrastructure."
]

# -----------------------------

# STEP 1: BM25 Filtering
# -----------------------------
tokenized_cvs = [cv.lower().split() for cv in cv_texts]
bm25 = BM25Okapi(tokenized_cvs)

job_tokens = job_description.lower().split()
bm25_scores = bm25.get_scores(job_tokens)

# Select top 3 candidates
top_n = 3
top_indices = np.argsort(bm25_scores)[::-1][:top_n]
top_cv_texts = [cv_texts[i] for i in top_indices]

# -----------------------------
# STEP 2: SBERT Semantic Scoring
# -----------------------------
model = SentenceTransformer('all-MiniLM-L6-v2')
job_embedding = model.encode(job_description, convert_to_tensor=True)
cv_embeddings = model.encode(top_cv_texts, convert_to_tensor=True)

# Compute semantic similarity
cosine_scores = util.cos_sim(job_embedding, cv_embeddings)[0].cpu().numpy()

# -----------------------------
# STEP 3: Rerank by Semantic Score
# -----------------------------
reranked = sorted(
    zip(top_indices, top_cv_texts, cosine_scores),
    key=lambda x: x[2],
    reverse=True
)

# Display final results
print("🔍 Top CVs after BM25 filtering + SBERT reranking:\n")
for idx, text, score in reranked:
    print(f"CV #{idx} | Semantic Score: {score:.4f}\n{text}\n{'-'*60}")


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


🔍 Top CVs after BM25 filtering + SBERT reranking:

CV #2 | Semantic Score: 0.7536
Full-stack developer proficient in Python, Django, RESTful services, PostgreSQL, and cloud deployment on AWS.
------------------------------------------------------------
CV #0 | Semantic Score: 0.7357
Experienced backend engineer skilled in Python, Flask, and SQL databases. Worked with REST APIs and Docker.
------------------------------------------------------------
CV #3 | Semantic Score: 0.4527
Data scientist with knowledge of Python, Pandas, machine learning, and data visualization tools.
------------------------------------------------------------
