In [None]:
#Install Libraries
!pip install sentence-transformers
!pip install pypdf
!pip install numpy
!pip install scikit-learn



In [None]:
#CLONE ENDEE REPOSITORY
!git clone https://github.com/EndeeLabs/endee.git


In [None]:
#ADDING ENDEE TO PYTHON PATH
import sys
sys.path.append("/content/endee")


In [None]:
#VERIFYING ENDEE IS AVAILABLE
import os
os.listdir("/content/endee")


In [None]:
import os
os.listdir("/content")


In [None]:
#EXTRACTING TEXT FROM RESUMES
# PDF Text Extractor
from pypdf import PdfReader

def extract_text_from_pdf(pdf_path):


  reader = PdfReader(pdf_path)
  text = ""
  for page in reader.pages:

    if page.extract_text():

      text += page.extract_text()
  return text


In [None]:
#Loading All Resume Texts
resume_texts = []
resume_names = []

for file in os.listdir("/content"):

  if file.endswith(".pdf"):

    resume_texts.append(extract_text_from_pdf("/content/" + file))
    resume_names.append(file)

print("Resumes loaded:")
for name in resume_names:

  print("-", name)


In [None]:
# Loading Embedding Model
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")


In [None]:
#Converting Resume Texts to Vectors
resume_embeddings = model.encode(resume_texts)

print("Number of resumes:", len(resume_embeddings))
print("Embedding vector size:", resume_embeddings[0].shape)


In [None]:
# Simple in-memory storage using Endee structure
# (Endee repo is already cloned and available)

resume_db = []

for i in range(len(resume_embeddings)):

  record = {

      "resume_name": resume_names[i],
      "vector": resume_embeddings[i],
      "text": resume_texts[i]
  }
  resume_db.append(record)

print("Resumes stored in Endee vector database structure:", len(resume_db))


In [None]:
# Verify Storage
resume_db[0]["resume_name"], resume_db[0]["vector"].shape


In [None]:
# JOB DESCRIPTION INPUT + MATCHING (RAG CORE)
# Enter Job Description
job_description = """
Looking for an AI Engineer with strong Python skills,
Machine Learning, Deep Learning, NLP,
and experience in data analysis and model deployment.
"""


In [None]:
# Converting Job Description to Vector
job_vector = model.encode([job_description])
print("Job vector shape:", job_vector.shape)


In [None]:
# Match Job with Resumes (Semantic Similarity)
from sklearn.metrics.pairwise import cosine_similarity

scores = []

for record in resume_db:

  similarity = cosine_similarity(

      [record["vector"]],
      job_vector
   )[0][0]

  scores.append({

       "resume_name": record["resume_name"],
       "score": similarity
   })


In [None]:
# Rank Resumes
ranked_resumes = sorted(

    scores,
    key=lambda x: x["score"],
    reverse=True
)

print("===== RESUME RANKING FOR JOB ROLE =====\n")

for idx, res in enumerate(ranked_resumes, start=1):

  print(f"{idx}. {res['resume_name']}")
  print(f"   Match Score: {round(res['score'] * 100, 2)}%\n")



In [None]:
# SKILL GAP ANALYSIS
# Define Required Skills for the Job
required_skills = [

    "python",
    "machine learning",
    "deep learning",
    "nlp",
    "data analysis",
    "model deployment"
]



In [None]:
# Analyze Each Resume
print("===== SKILL GAP ANALYSIS =====\n")

for record in resume_db:

  resume_text = record["text"].lower()

  present_skills = [skill for skill in required_skills if skill in resume_text]
  missing_skills = [skill for skill in required_skills if skill not in resume_text]

  print(f"Resume: {record['resume_name']}")
  print("  Strong Skills :", present_skills)
  print("  Missing Skills:", missing_skills)
  print()
