In [2]:
import faiss
import dotenv
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import json
from sentence_transformers import SentenceTransformer
import umap.umap_ as umap
import matplotlib.pyplot as plt
import os
import json
from openai import OpenAI

load_dotenv()  # Load environment variables from .env file

client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OPENAI_API_KEY"))

2025-11-21 11:32:10.601475: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-21 11:32:10.648601: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-21 11:32:12.470006: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [3]:
jobs_index = faiss.read_index("./data/jobs_index.faiss")
with open("./data/jobs_index_mapping.json", "r") as f:
    jobs_mapping = {int(k): v for k, v in json.load(f).items()}

resume_index = faiss.read_index("./data/resume_index.faiss")
with open("./data/resume_index_mapping.json", "r") as f:
    resume_mapping = json.load(f)

model = SentenceTransformer("all-MiniLM-L6-v2")

def find_top_jobs(cv_text, top_n=5):
    cv_emb = model.encode([cv_text], convert_to_numpy=True)
    cv_emb /= np.linalg.norm(cv_emb, axis=1, keepdims=True)
    distances, indices = jobs_index.search(cv_emb, top_n)
    return [{"job_id": jobs_mapping.get(idx, f"Unknown_{idx}"), "score": float(score)}
            for score, idx in zip(distances[0], indices[0])]

def reconstruct_embeddings(index):
    embeddings = np.zeros((index.ntotal, index.d), dtype=np.float32)
    for i in range(index.ntotal):
        index.reconstruct(i, embeddings[i])
    return embeddings

In [4]:
resume_json_folder = "./data/resume_extract_text"
resume_data = {}

for fname in os.listdir(resume_json_folder):
    if fname.lower().endswith(".txt"):
        path = os.path.join(resume_json_folder, fname)
        with open(path, "r", encoding="utf-8") as f:
            data = f.read()
        resume_data[fname] = data

In [7]:
jobs_df = pd.read_csv("./data/job_offer/datasets/ravindrasinghrana/job-description-dataset/versions/1/job_descriptions.csv")

cv_embeddings = reconstruct_embeddings(resume_index)
job_embeddings = reconstruct_embeddings(jobs_index)
all_embeddings = np.vstack([cv_embeddings, job_embeddings])

reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=15, min_dist=0.1)
all_2d = reducer.fit_transform(all_embeddings)

cv_index = 25
cv_filename = resume_mapping[cv_index]

print("===== CV s√©lectionn√© =====")
if cv_filename in resume_data:
    print(resume_data[cv_filename])

cv_emb_norm = cv_embeddings[cv_index:cv_index+1] / np.linalg.norm(
    cv_embeddings[cv_index:cv_index+1], axis=1, keepdims=True
)

distances, indices = jobs_index.search(cv_emb_norm, 5)

results = [
    {"job_index": int(idx), "job_id": jobs_mapping[int(idx)], "score": float(score)}
    for score, idx in zip(distances[0], indices[0])
]

print("\n===== Top 5 offres d'emploi correspondantes =====")

for r in results:
    job_id = r["job_id"]
    
    print(f"\n--- üíº Offre trouv√©e : {job_id} ---")
    print(f"Score de similarit√© : {r['score']:.4f}")
    
    job_row = jobs_df[jobs_df["Job Id"] == job_id]

    if len(job_row) == 0:
        print("‚ö†Ô∏è Offre non trouv√©e dans jobs_df")
        continue
    
    row = job_row.iloc[0]
    
    fields_to_show = [
        "Job Title", "Company Name", "Location", "Experience",
        "Qualifications", "Skills", "Job Description", "Responsibilities",
        "Benefits", "Work Type", "Salary Range"
    ]
    
    for col in fields_to_show:
        if col in row and not pd.isna(row[col]):
            print(f"{col}: {row[col]}")

  warn(


===== CV s√©lectionn√© =====
Skills: Genetic nutrition background; Adult, adolescent and child nutrition; Use of anthropometric measurements; Proficient speaker of Arabic and English; Microsoft Word; Microsoft Excel; Microsoft PowerPoint; Team leadership; Confident public speaker
Experience: Fitness Attendant at Company Name, City, State; Nutritionist at Company Name, City, State; Trainee in food services and in healthy and therapeutic nutrition at Company Name, City, State
Education: Master of Science: Human Nutrition at University of New Haven; Bachelor of Science: Human Nutrition and Dietetics at University of Jordan
Certifications: First Aid/ CPR/AED (current)
Summary: Highly-motivated Nutritionist with a masters degree seeking a career position in the healthcare field. Goal-oriented and high-achieving professional with advanced knowledge and skills in nutrition. Highly effective at evaluating the nutritional needs of both adults and children with various diseases. Self-starter, en

In [9]:
prompt = f"for the following resume : {resume_data[cv_filename]} analyse its proximity with the 5 jobs descriptions of this dataframe : {jobs_df} and explain for each job why the resume is relevant for it, the strengths of the candidate and the possible weaknesses."

response = client.chat.completions.create(
        model="openai/gpt-5-nano",
        messages=[{"role": "user", "content": prompt}]
    )

print("\n===== Analyse GPT-5 Nano =====")
print(response.choices[0].message.content)


===== Analyse GPT-5 Nano =====
Here is an alignment assessment of the provided Nutritionist resume against the first five job descriptions in the sample dataframe. For each job, I note how close the resume is to the role, the candidate‚Äôs strengths you could leverage, potential gaps, and practical ways to tailor or improve the resume/approach.

Job 0
- Job Title: Digital Marketing Specialist (Social Media Manager)
- Proximity (how well the resume aligns): Low to Moderate
  - Why: This role centers on content creation, campaign management, social media analytics, and digital branding. The resume shows strong communication skills and leadership, but no direct digital marketing, social media management, or analytics experience.
- Strengths the candidate could leverage
  - Excellent bilingual communication (Arabic/English) useful for diverse audiences.
  - Public speaking and client-facing/education experience from nutrition roles.
  - Leadership and teamwork, plus comfort with presentin