In [13]:
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer

In [14]:
jobs_index = faiss.read_index("./data/jobs_index.faiss")
with open("./data/jobs_index_mapping.json", "r") as f:
    jobs_mapping = json.load(f)

resume_index = faiss.read_index("./data/resume_index.faiss")
with open("./data/resume_index_mapping.json", "r") as f:
    resume_mapping = json.load(f)

In [15]:
model = SentenceTransformer("all-MiniLM-L6-v2")

def find_top_jobs_for_cv(cv_text, top_n=5):
    """
    cv_text : texte du CV à rechercher
    top_n : nombre de jobs à retourner
    """
    # Embed le CV
    cv_emb = model.encode([cv_text], convert_to_numpy=True)
    cv_emb = cv_emb / np.linalg.norm(cv_emb, axis=1, keepdims=True)  # normalisation pour cosine

    distances, indices = jobs_index.search(cv_emb, top_n)

    results = []
    for score, idx in zip(distances[0], indices[0]):
        job_id = jobs_mapping.get(str(idx), f"Unknown_{idx}")
        results.append({"job_id": job_id, "score": float(score)})

    return results

In [21]:
cv_idx = 0
cv_emb = np.zeros((1, resume_index.d), dtype=np.float32)
resume_index.reconstruct(cv_idx, cv_emb[0])

cv_emb = cv_emb / np.linalg.norm(cv_emb, axis=1, keepdims=True)

top_n = 5
distances, indices = jobs_index.search(cv_emb, top_n)

results = [{"job_id": jobs_mapping[str(idx)], "score": float(score)}
           for score, idx in zip(distances[0], indices[0])]

print(f"Top {top_n} jobs pour le CV {resume_mapping[cv_idx]}:")
for r in results:
    print(r)

Top 5 jobs pour le CV 1.txt:
{'job_id': 1445309020311373, 'score': 0.6037259101867676}
{'job_id': 2307363426817404, 'score': 0.5961668491363525}
{'job_id': 457775442040638, 'score': 0.5943095088005066}
{'job_id': 2039602295244238, 'score': 0.593273401260376}
{'job_id': 2337193539465992, 'score': 0.5650455951690674}
