<a href="https://colab.research.google.com/github/Nuthan10/ATS/blob/main/ATS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import tensorflow_hub as hub
import tensorflow_text  # needed for some USE models
from sklearn.metrics.pairwise import cosine_similarity
import PyPDF2

# 2) Helpers to pull text out of PDF/TXT resumes & JDs
def extract_text_from_pdf(path):
    text = ""
    with open(path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            p = page.extract_text()
            if p:
                text += p
    return text

def load_documents(folder):
    docs = []
    for fn in sorted(os.listdir(folder)):
        if fn.lower().endswith((".pdf", ".txt")):
            path = os.path.join(folder, fn)
            if fn.lower().endswith(".pdf"):
                txt = extract_text_from_pdf(path)
            else:
                txt = open(path, "r", encoding="utf-8").read()
            docs.append((fn, txt.replace("\n", " ")))
    return docs

# 3) Load USE from TensorFlow Hub (this may take ~30s first run)
print("Loading Universal Sentence Encoder…")
use = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

def get_use_embeddings(texts):
    """
    texts: list of str → returns np.array of shape (len(texts), embed_dim)
    """
    return use(texts).numpy()

# 4) Rank resumes by cosine similarity against JD
def rank_resumes(resumes, jd_text):
    # batch all texts at once for speed
    all_texts = [jd_text] + [txt for _, txt in resumes]
    embeddings = get_use_embeddings(all_texts)
    jd_emb = embeddings[0:1]            # shape (1, dim)
    resume_embs = embeddings[1:]        # shape (n, dim)

    sims = cosine_similarity(jd_emb, resume_embs)[0]  # length n
    scored = list(zip([fn for fn, _ in resumes], sims))
    return sorted(scored, key=lambda x: x[1], reverse=True)

# 5) Main driver
def main(resume_folder, jd_file):
    resumes = load_documents(resume_folder)

    if jd_file.lower().endswith(".pdf"):
        jd = extract_text_from_pdf(jd_file).replace("\n", " ")
    else:
        jd = open(jd_file, "r", encoding="utf-8").read().replace("\n", " ")

    print("Computing embeddings and similarities…")
    ranked = rank_resumes(resumes, jd)

    print("\n=== Resume Rankings ===")
    for fn, score in ranked:
     pct = score * 100
     print(f"{fn}: {pct:.2f}%")

# Define the paths to your resume folder and JD file here
resume_folder = "/content/drive/MyDrive/ATS/resumes"  # Replace with the actual path
jd_file = "/content/drive/MyDrive/ATS/job_description.txt"  # Replace with the actual path

main(resume_folder, jd_file)

In [None]:
!pip install PyPDF2