<a href="https://colab.research.google.com/github/MeRitisha/EdunetInternship/blob/main/AI_powered_Resume_Screening_and_Ranking_System_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

!pip install spacy transformers torch pandas scikit-learn PyPDF2 python-docx
!python -m spacy download en_core_web_sm

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx, PyPDF2
Successfully installed PyPDF2-3.0.1 python-docx-1.1.2
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now

In [None]:
import pandas as pd
import spacy
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity
from google.colab import files
import PyPDF2
import docx

In [None]:
# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Load spaCy for preprocessing
nlp = spacy.load("en_core_web_sm")

In [None]:
def preprocess(text):
    """
    Preprocess text: remove stopwords, punctuation, and lemmatize.
    """
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
    return " ".join(tokens)

In [None]:
def get_bert_embeddings(text):
    """
    Generate BERT embeddings for a given text.
    """
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    # Use the [CLS] token embedding as the sentence embedding
    return outputs.last_hidden_state[:, 0, :].squeeze().numpy()

In [None]:
def extract_text_from_pdf(file):
    """
    Extract text from a PDF file.
    """
    pdf_reader = PyPDF2.PdfReader(file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def extract_text_from_docx(file):
    """
    Extract text from a DOCX file.
    """
    doc = docx.Document(file)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + "\n"
    return text

In [None]:
def compare_resume_with_job_description(resume_text, job_description):
    """
    Compare a resume with a job description using BERT embeddings and cosine similarity.
    Focus on generic professional keywords (skills, experience, education).
    """
    # Preprocess text
    processed_resume = preprocess(resume_text)
    processed_job_description = preprocess(job_description)

    # Get BERT embeddings
    resume_embedding = get_bert_embeddings(processed_resume)
    job_description_embedding = get_bert_embeddings(processed_job_description)

    # Calculate cosine similarity
    similarity = cosine_similarity([resume_embedding], [job_description_embedding])[0][0]

    # Provide feedback
    feedback = []
    job_keywords = set(processed_job_description.split())
    resume_keywords = set(processed_resume.split())
    missing_keywords = job_keywords - resume_keywords

    if missing_keywords:
        feedback.append(f"**Missing Keywords:** {', '.join(missing_keywords)}")
    else:
        feedback.append("**Great job!** Your resume includes all the important keywords.")

    return similarity, feedback

In [None]:
# Sample Resume
resume_text = """
Experienced marketing professional with 5 years of experience in digital marketing and SEO.
Proficient in Google Analytics, SEMrush, and social media marketing. Strong analytical skills and a passion for data-driven decision-making.
"""

# Sample Job Description (Marketing Role)
job_description = """
We are looking for a Marketing Specialist with experience in digital marketing and SEO.
The ideal candidate should have hands-on experience with Google Analytics, SEMrush, and social media marketing.
Strong analytical skills and a passion for data-driven decision-making are a plus.
"""

# Compare resume with job description
similarity, feedback = compare_resume_with_job_description(resume_text, job_description)
print(f"Similarity Score: {similarity:.4f}")

# Display feedback
for item in feedback:
    print(item)

Similarity Score: 1.0000
**Great job!** Your resume includes all the important keywords.


In [None]:
# Upload resume
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# Extract text from the uploaded file
if file_name.endswith('.pdf'):
    resume_text = extract_text_from_pdf(file_name)
elif file_name.endswith('.docx'):
    resume_text = extract_text_from_docx(file_name)
else:
    raise ValueError("Unsupported file format. Please upload a PDF or DOCX file.")

# Sample Job Description
job_description = """
We are looking for a software engineer with experience in Python and machine learning.
The ideal candidate should have hands-on experience with TensorFlow, PyTorch, and cloud platforms like AWS.
Strong problem-solving skills and a passion for AI are a plus.
"""

# Sample Company Description
company_description = """
Our company is a leading innovator in AI and cloud computing. We value creativity, collaboration, and a passion for technology.
We are committed to building a diverse and inclusive workplace where everyone can thrive.
"""

# Compare resume with job and company descriptions
job_similarity, company_similarity = compare_resume_with_descriptions(resume_text, job_description, company_description)
print(f"Job Description Similarity Score: {job_similarity:.4f}")
print(f"Company Description Similarity Score: {company_similarity:.4f}")

# Interpret the scores
if job_similarity >= 0.8:
    print("Your resume is a great match for the job description!")
elif job_similarity >= 0.5:
    print("Your resume is a good match for the job description, but could be improved.")
else:
    print("Your resume does not match the job description well.")

if company_similarity >= 0.8:
    print("Your resume aligns well with the company's values and culture!")
elif company_similarity >= 0.5:
    print("Your resume somewhat aligns with the company's values and culture.")
else:
    print("Your resume does not align well with the company's values and culture.")

Saving Ritisha_resume[1].pdf to Ritisha_resume[1].pdf
Job Description Similarity Score: 1.0000
Company Description Similarity Score: 1.0000
Your resume is a great match for the job description!
Your resume aligns well with the company's values and culture!
