In [2]:
import os
import fitz  # PyMuPDF
import pandas as pd
from sentence_transformers import SentenceTransformer, util


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load the job description from the text file
with open("Job_Description.txt", "r", encoding='utf-8') as f:
    job_description = f.read()

# Optional: Preview the first 300 characters
print(job_description[:300])


We are seeking a Machine Learning Engineer with experience in Python, TensorFlow, and natural language processing. The candidate should have knowledge of resume parsing, scoring systems, and data preprocessing. Strong communication and collaboration skills are a plus.



In [9]:
import os

# Folder containing the resumes
resume_folder = "Sample_Resumes"

# List to store extracted resume texts and filenames
resumes = []
resume_names = []

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    import fitz  # PyMuPDF
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Loop through all PDF files in the folder and extract text
for filename in os.listdir(resume_folder):
    if filename.lower().endswith(".pdf"):
        pdf_path = os.path.join(resume_folder, filename)
        text = extract_text_from_pdf(pdf_path)
        resumes.append(text)
        resume_names.append(filename)

print(f"✅ Extracted text from {len(resumes)} resumes.")


✅ Extracted text from 2 resumes.


In [10]:
for name, text in zip(resume_names, resumes):
    print(f"\n--- {name} ---\n{text[:500]}...\n")



--- resume1.pdf ---
Resume
Name: Alice Johnson
Role: Machine Learning Engineer
Skills: Python, TensorFlow, Scikit-learn, SQL
Experience:
Worked on end-to-end ML pipelines. Built classification models for customer churn. Optimized
training time using GPU acceleration.
Page 1
...


--- resume2.pdf ---
Resume
Name: Bob Smith
Role: Data Scientist
Skills: Pandas, NumPy, Matplotlib, NLP, PyTorch
Experience:
Developed sentiment analysis tools. Created dashboards with data visualizations. Collaborated with
engineering team for deployment.
Page 1
...



In [11]:
from sentence_transformers import SentenceTransformer, util
import pandas as pd

# Load a pre-trained model (you can try other models like 'paraphrase-MiniLM-L6-v2')
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load job description
with open("Job_Description.txt", "r", encoding='utf-8') as f:
    job_description = f.read()

# Convert job description to embedding
job_embedding = model.encode(job_description, convert_to_tensor=True)

# Convert each resume to embedding and calculate similarity
scores = []
for resume_text in resumes:
    resume_embedding = model.encode(resume_text, convert_to_tensor=True)
    similarity = util.pytorch_cos_sim(job_embedding, resume_embedding).item()
    scores.append(similarity)

# Create a DataFrame of results
results_df = pd.DataFrame({
    "Resume": resume_names,
    "Score": scores
})

# Sort by score (higher = more relevant)
results_df.sort_values(by="Score", ascending=False, inplace=True)

# Save results
results_df.to_csv("Score_Output.csv", index=False)

# Display
print("✅ Ranking Complete! Here are the scores:")
print(results_df)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


✅ Ranking Complete! Here are the scores:
        Resume     Score
0  resume1.pdf  0.687633
1  resume2.pdf  0.560680
