<a href="https://colab.research.google.com/github/VISHAL-1-3/Resume/blob/main/resume(z).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import re
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# --- Skill databases by domain and category ---
DOMAIN_SKILLS = {
    'technology': {
        'technical': ['python', 'java', 'c++', 'javascript', 'sql', 'tensorflow', 'pytorch', 'machine learning', 'scikit-learn'],
        'soft': ['communication', 'teamwork', 'problem solving', 'leadership'],
        'domain': []
    },
    'teaching': {
        'technical': [],
        'soft': ['communication', 'leadership', 'teamwork', 'patience', 'empathy'],
        'domain': ['lesson planning', 'curriculum development', 'classroom management', 'student assessment']
    }
}

# --- Extract text from resume PDF ---
def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, "rb") as f:
        pdf = PyPDF2.PdfReader(f)
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + " "
    return text

# --- Extract skills present in text ---
def extract_skills(text, skill_list):
    text_lower = text.lower()
    found = []
    for skill in skill_list:
        pattern = r'\b' + re.escape(skill) + r'\b'
        if re.search(pattern, text_lower, flags=re.IGNORECASE):
            found.append(skill)
    return found

# --- Detect job domain from job description text ---
def detect_job_domain(text):
    text = text.lower()
    if any(word in text for word in ['software', 'developer', 'python', 'machine learning']):
        return 'technology'
    elif any(word in text for word in ['lesson', 'curriculum', 'student', 'classroom']):
        return 'teaching'
    else:
        return 'technology'  # default

# --- Partial cosine similarity skill match ---
def partial_skill_match_score(resume_skills, job_skills):
    if not resume_skills or not job_skills:
        return 0.0
    all_skills = resume_skills + job_skills
    vectorizer = TfidfVectorizer().fit(all_skills)
    resume_vecs = vectorizer.transform(resume_skills)
    job_vecs = vectorizer.transform(job_skills)
    sim_matrix = cosine_similarity(resume_vecs, job_vecs)
    max_sim_per_resume_skill = sim_matrix.max(axis=1)
    return np.mean(max_sim_per_resume_skill) * 100

# --- Weighted ATS score calculation ---
def calculate_weighted_ats_score(resume_skills_dict, job_skills_dict, domain):
    domain_weights = {
        'technology': {'technical': 0.7, 'soft': 0.15, 'domain': 0.15},
        'teaching': {'technical': 0.1, 'soft': 0.5, 'domain': 0.4},
    }
    weights = domain_weights.get(domain, {'technical': 0.5, 'soft': 0.3, 'domain': 0.2})

    technical_score = partial_skill_match_score(resume_skills_dict.get('technical', []),
                                                job_skills_dict.get('technical', []))
    soft_score = partial_skill_match_score(resume_skills_dict.get('soft', []),
                                           job_skills_dict.get('soft', []))
    domain_score = partial_skill_match_score(resume_skills_dict.get('domain', []),
                                             job_skills_dict.get('domain', []))

    overall_score = (technical_score * weights['technical'] +
                     soft_score * weights['soft'] +
                     domain_score * weights['domain'])

    return round(overall_score, 2)

# --- Main Execution ---

# Replace with your uploaded resume PDF file path
resume_pdf_path = '/content/Vishal_Gupta_22126061.pdf'
resume_text = extract_text_from_pdf(resume_pdf_path)

# Take job description as text input (paste directly)
job_text = input("Paste job description text here:\n")

# Detect domain
domain = detect_job_domain(job_text)
print(f"Detected domain: {domain}")

# Extract skills from resume and job description
resume_skills = {
    'technical': extract_skills(resume_text, DOMAIN_SKILLS[domain]['technical']),
    'soft': extract_skills(resume_text, DOMAIN_SKILLS[domain]['soft']),
    'domain': extract_skills(resume_text, DOMAIN_SKILLS[domain]['domain'])
}

job_skills = {
    'technical': extract_skills(job_text, DOMAIN_SKILLS[domain]['technical']),
    'soft': extract_skills(job_text, DOMAIN_SKILLS[domain]['soft']),
    'domain': extract_skills(job_text, DOMAIN_SKILLS[domain]['domain'])
}

print("Resume Skills:", resume_skills)
print("Job Skills:", job_skills)

# Calculate ATS score
ats_score = calculate_weighted_ats_score(resume_skills, job_skills, domain)
print(f"Weighted ATS similarity score: {ats_score}%")


Paste job description text here:
At [Company X], we’re proud to stand at the forefront of the Big Data revolution. Using the latest analytics tools and processes, we’re able to maximize our offerings and deliver unparalleled service and support. To help carry us even further, we’re searching for an experienced data analyst to join our team. The ideal candidate will be highly skilled in all aspects of data analytics, including mining, generation, and visualization. Additionally, this person should be committed to transforming data into readable, goal-oriented reports that drive innovation and growth.  Objectives of this role Develop, implement, and maintain leading-edge analytics systems, taking complicated problems and building simple frameworks Identify trends and opportunities for growth through analysis of complex datasets Evaluate organizational methods and provide source-to-target mappings and information-model specification documents for datasets Create best-practice reports base