In [22]:
# -*- coding: utf-8 -*-
from __future__ import annotations

import re
import unicodedata
import numpy as np
import pandas as pd

from typing import List, Dict, Any
from sentence_transformers import SentenceTransformer, util

_model_cache: SentenceTransformer | None = None

def _get_model() -> SentenceTransformer:
    global _model_cache
    if _model_cache is None:
        _model_cache = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
    return _model_cache

def _n(s: str) -> str:
    s = s.lower().strip()
    s = unicodedata.normalize("NFKD", s)
    s = re.sub(r"[^a-z0-9.+\s]", " ", s)
    return " ".join(s.split())

def compare_skills(cv: List[str], jd: List[str], *, threshold: float = 0.6) -> Dict[str, Any]:
    cv_c = [_n(x) for x in cv]
    jd_c = [_n(x) for x in jd]
    m = _get_model()
    cv_e = m.encode(cv_c, convert_to_tensor=True)
    jd_e = m.encode(jd_c, convert_to_tensor=True)
    sim = util.cos_sim(jd_e, cv_e).cpu().numpy()
    sim_df = pd.DataFrame(sim, index=jd, columns=cv)
    bin_df = pd.DataFrame((sim >= threshold).astype(np.int8), index=jd, columns=cv)
    matches = bin_df.max(axis=1).to_numpy(bool)
    score = int(matches.sum())
    score_percent = round(score / len(jd) * 100, 2) if jd else 0.0
    return {
        "similarity": sim_df,
        "binary": bin_df,
        "matches": matches,
        "score": score,
        "score_percent": score_percent
    }

In [23]:
if __name__ == "__main__":
    cv_skills = ["Python", "SQL", "Docker", "TensorFlow", "Data Analysis"]
    jd_skills = ["Python Programming", "Machine Learning", "Docker"]

    result = compare_skills(cv_skills, jd_skills, threshold=0.6)

    print("Cosine Similarity Matrix:\n", result["similarity"])
    print("\nBinary Match Matrix (1 = match):\n", result["binary"])
    print("\nMatched JD Skills:", int(result["score"]))
    print("Skill Match Score (%):", result["score_percent"])

Cosine Similarity Matrix:
                       Python       SQL    Docker  TensorFlow  Data Analysis
Python Programming  0.788405  0.295026  0.134603    0.299467       0.358885
Machine Learning    0.384568  0.315531  0.147993    0.408608       0.484847
Docker              0.180659  0.077535  1.000000    0.261846       0.094295

Binary Match Matrix (1 = match):
                     Python  SQL  Docker  TensorFlow  Data Analysis
Python Programming       1    0       0           0              0
Machine Learning         0    0       0           0              0
Docker                   0    0       1           0              0

Matched JD Skills: 2
Skill Match Score (%): 66.67
