In [2]:
!pip install sentence-transformers scikit-learn numpy

Collecting sentence-transformers
  Using cached sentence_transformers-5.2.0-py3-none-any.whl.metadata (16 kB)
Using cached sentence_transformers-5.2.0-py3-none-any.whl (493 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-5.2.0



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

class ResumeMatcher:
    def __init__(self):
        # Load a pre-trained BERT model (Small, fast, and powerful)
        # using 'all-MiniLM-L6-v2' which is optimized for semantic search
        print("Loading AI Model (all-MiniLM-L6-v2)...")
        self.model = SentenceTransformer('all-MiniLM-L6-v2')

    def get_embedding(self, text):
        """Converts text into a 384-dimensional dense vector."""
        return self.model.encode(text)

    def match(self, job_description, resumes):
        """
        Compares JD vector against Resume vectors using Cosine Similarity.
        Returns a list of tuples: (candidate_name, match_percentage)
        """
        print("\nProcessing Semantic Matching...")
        
        # 1. Vectorize the Job Description
        # Reshape is needed for sklearn cosine_similarity (expects 2D array)
        jd_vector = self.get_embedding(job_description).reshape(1, -1)
        
        results = []
        
        # 2. Vectorize and Compare each Resume
        for person, text in resumes.items():
            resume_vector = self.get_embedding(text).reshape(1, -1)
            
            # Math: Calculate Cosine Similarity (Result is between -1 and 1)
            similarity_score = cosine_similarity(jd_vector, resume_vector)[0][0]
            
            # Convert to percentage
            percentage = round(similarity_score * 100, 2)
            results.append((person, percentage))
            
        # 3. Sort by highest match percentage
        results.sort(key=lambda x: x[1], reverse=True)
        return results

# --- MOCK DATA ---
job_desc = """
We are looking for a Python Developer with AI experience. 
Must know Machine Learning, Scikit-Learn, and Vector Databases.
Experience with API development (Flask/FastAPI) is a plus.
"""

candidates = {
    "Candidate A (Web Dev)": "I am a React developer. I know JavaScript, CSS, HTML, and Node.js. I build front-end websites.",
    "Candidate B (Junior ML)": "I know Python and Pandas. I have built some basic regression models. I am learning AI.",
    "Candidate C (Your Profile)": "I am an M.Tech scholar in AI. Expert in Python, Scikit-Learn, and Vector Search (Pinecone). I have built APIs using Flask."
}

# --- EXECUTION ---
if __name__ == "__main__":
    matcher = ResumeMatcher()
    ranked_candidates = matcher.match(job_desc, candidates)

    print(f"\nJOB DESCRIPTION: {job_desc.strip()}")
    print("-" * 50)
    print("RANKING CANDIDATES:")
    
    for rank, (name, score) in enumerate(ranked_candidates, 1):
        bar = "█" * int(score / 5)
        print(f"{rank}. {name} : {score}% Match \t|{bar}")

  from .autonotebook import tqdm as notebook_tqdm


Loading AI Model (all-MiniLM-L6-v2)...

Processing Semantic Matching...

JOB DESCRIPTION: We are looking for a Python Developer with AI experience. 
Must know Machine Learning, Scikit-Learn, and Vector Databases.
Experience with API development (Flask/FastAPI) is a plus.
--------------------------------------------------
RANKING CANDIDATES:
1. Candidate C (Your Profile) : 67.08000183105469% Match 	|█████████████
2. Candidate B (Junior ML) : 62.810001373291016% Match 	|████████████
3. Candidate A (Web Dev) : 33.63999938964844% Match 	|██████
