In [49]:
import faiss
import numpy as np
import pandas as pd
import json
from sentence_transformers import SentenceTransformer
import umap.umap_ as umap
import matplotlib.pyplot as plt
import os
import json

In [50]:
jobs_index = faiss.read_index("./data/jobs_index.faiss")
with open("./data/jobs_index_mapping.json", "r") as f:
    jobs_mapping = {int(k): v for k, v in json.load(f).items()}

resume_index = faiss.read_index("./data/resume_index.faiss")
with open("./data/resume_index_mapping.json", "r") as f:
    resume_mapping = json.load(f)

model = SentenceTransformer("all-MiniLM-L6-v2")

def find_top_jobs(cv_text, top_n=5):
    cv_emb = model.encode([cv_text], convert_to_numpy=True)
    cv_emb /= np.linalg.norm(cv_emb, axis=1, keepdims=True)
    distances, indices = jobs_index.search(cv_emb, top_n)
    return [{"job_id": jobs_mapping.get(idx, f"Unknown_{idx}"), "score": float(score)}
            for score, idx in zip(distances[0], indices[0])]

def reconstruct_embeddings(index):
    embeddings = np.zeros((index.ntotal, index.d), dtype=np.float32)
    for i in range(index.ntotal):
        index.reconstruct(i, embeddings[i])
    return embeddings

In [51]:
resume_json_folder = "./data/resume_extract_text"
resume_data = {}

for fname in os.listdir(resume_json_folder):
    if fname.lower().endswith(".txt"):
        path = os.path.join(resume_json_folder, fname)
        with open(path, "r", encoding="utf-8") as f:
            data = f.read()
        resume_data[fname] = data

In [52]:
jobs_df = pd.read_csv("./data/job_offer/job_descriptions.csv")

cv_embeddings = reconstruct_embeddings(resume_index)
job_embeddings = reconstruct_embeddings(jobs_index)
all_embeddings = np.vstack([cv_embeddings, job_embeddings])

reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=15, min_dist=0.1)
all_2d = reducer.fit_transform(all_embeddings)

cv_index = 25
cv_filename = resume_mapping[cv_index]

print("===== CV s√©lectionn√© =====")
if cv_filename in resume_data:
    print(resume_data[cv_filename])

cv_emb_norm = cv_embeddings[cv_index:cv_index+1] / np.linalg.norm(
    cv_embeddings[cv_index:cv_index+1], axis=1, keepdims=True
)

distances, indices = jobs_index.search(cv_emb_norm, 5)

results = [
    {"job_index": int(idx), "job_id": jobs_mapping[int(idx)], "score": float(score)}
    for score, idx in zip(distances[0], indices[0])
]

print("\n===== Top 5 offres d'emploi correspondantes =====")

for r in results:
    job_id = r["job_id"]
    
    print(f"\n--- üíº Offre trouv√©e : {job_id} ---")
    print(f"Score de similarit√© : {r['score']:.4f}")
    
    job_row = jobs_df[jobs_df["Job Id"] == job_id]

    if len(job_row) == 0:
        print("‚ö†Ô∏è Offre non trouv√©e dans jobs_df")
        continue
    
    row = job_row.iloc[0]
    
    fields_to_show = [
        "Job Title", "Company Name", "Location", "Experience",
        "Qualifications", "Skills", "Job Description", "Responsibilities",
        "Benefits", "Work Type", "Salary Range"
    ]
    
    for col in fields_to_show:
        if col in row and not pd.isna(row[col]):
            print(f"{col}: {row[col]}")

  warn(


===== CV s√©lectionn√© =====
Skills: Genetic nutrition background; Adult, adolescent and child nutrition; Use of anthropometric measurements; Proficient speaker of Arabic and English; Microsoft Word; Microsoft Excel; Microsoft PowerPoint; Team leadership; Confident public speaker
Experience: Fitness Attendant at Company Name, City, State; Nutritionist at Company Name, City, State; Trainee in food services and in healthy and therapeutic nutrition at Company Name, City, State
Education: Master of Science: Human Nutrition at University of New Haven; Bachelor of Science: Human Nutrition and Dietetics at University of Jordan
Certifications: First Aid/ CPR/AED (current)
Summary: Highly-motivated Nutritionist with a masters degree seeking a career position in the healthcare field. Goal-oriented and high-achieving professional with advanced knowledge and skills in nutrition. Highly effective at evaluating the nutritional needs of both adults and children with various diseases. Self-starter, en