In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import os
from IPython.display import display


In [2]:
DATA_DIR = os.path.join(os.pardir, "data")
EMB_DIR = os.path.join(DATA_DIR, "embeddings")

In [8]:
resume_embeddings = np.load(os.path.join(EMB_DIR, "resume_embeddings.npy"))
job_embeddings    = np.load(os.path.join(EMB_DIR, "job_embeddings.npy"))


In [9]:
resume_df = pd.read_csv(os.path.join(DATA_DIR, "resumes_cleaned.csv"))
job_df    = pd.read_csv(os.path.join(DATA_DIR, "jobs_cleaned.csv"))

In [4]:
sim_matrix = cosine_similarity(job_embeddings, resume_embeddings)


In [None]:
top_k = 5
top_indices = np.argsort(-sim_matrix, axis=1)[:, :top_k]

# Pick useful columns if they exist in resumes_cleaned.csv
display_cols = [c for c in [
    "Name", "Current_Title", "YearsExperience", "Skills", "Location", "Resume_clean", "Resume"
] if c in resume_df.columns]

def snippet(text, max_chars=280):
    if isinstance(text, str) and len(text) > max_chars:
        return text[:max_chars].rsplit(" ", 1)[0] + " ..."
    return text

for i, res_idxs in enumerate(top_indices[:3]):  # first 3 jobs
    job_title = job_df.iloc[i]["Title"] if "Title" in job_df.columns else f"Job {i}"
    print(f"Job {i}: {job_title}")

    rows = []
    for rank, res_idx in enumerate(res_idxs, 1):
        score = float(sim_matrix[i, res_idx])
        row = {
            "rank": rank,
            "ResumeID": resume_df.iloc[res_idx]["ID"] if "ID" in resume_df.columns else res_idx,
            "score": score,
        }
        for c in display_cols:
            val = resume_df.iloc[res_idx][c]
            # Show a short preview for long text fields
            if c.lower().startswith("resume"):
                val = snippet(val, 300)
            row[c] = val
        rows.append(row)

    display(pd.DataFrame(rows))
    print()

Job 0: Secretary / office helper
  1. Resume ID 23804341  (0.623)
  2. Resume ID 17857644  (0.613)
  3. Resume ID 16103783  (0.601)
  4. Resume ID 13915715  (0.594)
  5. Resume ID 47470864  (0.593)

Job 1: Cheif Accountant
  1. Resume ID 12338274  (0.703)
  2. Resume ID 22465498  (0.674)
  3. Resume ID 16237710  (0.671)
  4. Resume ID 24103168  (0.665)
  5. Resume ID 53640713  (0.664)

Job 2: Sales representative
  1. Resume ID 21807211  (0.612)
  2. Resume ID 50324968  (0.596)
  3. Resume ID 15535408  (0.587)
  4. Resume ID 36621169  (0.587)
  5. Resume ID 71576860  (0.582)

