###Build a Job Description and Resume Matching System

In [None]:
!pip install docx2txt
!pip install PyPDF2
import os
import docx2txt
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [None]:
def extract_text_from_pdf(file_path):
    """Extract text from a PDF file."""
    text = ""
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

In [None]:
def extract_text_from_docx(file_path):
    """Extract text from a DOCX file."""
    return docx2txt.process(file_path)

In [None]:
def extract_text_from_txt(file_path):
    """Extract text from a TXT file."""
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

In [None]:
def extract_text(file_path):
    """Determine the file type and extract text accordingly."""
    if file_path.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        return extract_text_from_docx(file_path)
    elif file_path.endswith('.txt'):
        return extract_text_from_txt(file_path)
    else:
        print(f"Unsupported file type for {file_path}. Skipping.")
        return ""

In [None]:
def match_resumes(job_description, resume_files):
    """Match resumes against a job description and return ranked results."""
    resumes = [extract_text(resume) for resume in resume_files]

    if not resumes:
        print("No valid resumes were processed.")
        return []

    # Vectorize the job description and resumes
    vectorizer = TfidfVectorizer().fit_transform([job_description] + resumes)
    vectors = vectorizer.toarray()

    # Calculate cosine similarities
    job_vector = vectors[0]
    resume_vectors = vectors[1:]
    similarities = cosine_similarity([job_vector], resume_vectors)[0]

    # Rank resumes by similarity
    ranked_indices = similarities.argsort()[::-1][:5]  # Top 5 matches
    ranked_resumes = [(resume_files[i], round(similarities[i], 2)) for i in ranked_indices]
    return ranked_resumes

In [None]:
def main():
    # Hardcoded paths to resume files
    resume_files = [
        "/content/Data Scientist.pdf",
        "/content/Finantial Analist.pdf",
        "/content/Healthcare.txt",
        "/content/Software engineer.pdf",
        "/content/Teacher.pdf",
        "/content/advocate.txt",
        "/content/backend developer.pdf",
        "/content/banking.txt",
        "/content/data engineer.pdf",
        "/content/designer.pdf",
        "/content/dev oops engineer.pdf",
        "/content/Engineer.pdf",
        "/content/marketing specialist.pdf",
        "/content/project manager.pdf",
        "/content/software developer.pdf"
    ]

    # Check if all files exist
    valid_files = [file for file in resume_files if os.path.exists(file)]
    if len(valid_files) != len(resume_files):
        print("Some files were not found and will be skipped.")

    # Input job description
    job_description = input("Enter the job description: ")

    # Match resumes
    ranked_resumes = match_resumes(job_description, valid_files)

    # Display results
    print("\nTop 5 Matching Resumes:")
    for rank, (filename, score) in enumerate(ranked_resumes, start=1):
        print(f"{rank}. {filename} - Similarity Score: {score}")

In [None]:
if __name__ == "__main__":
    main()

Enter the job description: Job Description: We are looking for a highly skilled Software Engineer to join our development team. The ideal candidate will have experience in Python, Java, or C++, along with a strong understanding of software development life cycles. You will collaborate with cross-functional teams to design, develop, and maintain scalable software solutions.  Key Responsibilities:  Develop high-quality software that meets technical and functional requirements. Write clean, maintainable, and efficient code. Troubleshoot, debug, and resolve software issues. Collaborate with team members on architectural designs and technical decisions. Skills Required:  Proficiency in programming languages such as Python, Java, or C++. Experience with frameworks like Django, React, or Spring Boot. Knowledge of databases (SQL, NoSQL). Familiarity with cloud platforms like AWS or Azure.

Top 5 Matching Resumes:
1. /content/Software engineer.pdf - Similarity Score: 0.29
2. /content/Engineer.p