<a href="https://colab.research.google.com/github/Diksha-mor21/Resume-Analyzer/blob/main/Resume_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages
!pip install docx2txt pdfminer.six scikit-learn nltk fuzzywuzzy

# Import required libraries
import nltk
import re
import docx2txt
from pdfminer.high_level import extract_text
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from IPython.display import display, clear_output
import ipywidgets as widgets
import json
import os

# Download NLTK resources
nltk.download('stopwords')
nltk.download('punkt')

# Predefined skill list for matching
DESIRED_SKILLS = [
    "Python", "Data Analysis", "Machine Learning", "Tableau",
    "PowerBI", "SQL", "Excel", "Communication", "Statistics",
    "Java", "C++", "AWS", "Cloud Computing", "Deep Learning",
    "Big Data", "Kubernetes", "Docker", "Hadoop", "Spark"]

# Utility Functions
def extract_text_from_file(file_path):
    """Extracts text from PDF or DOCX file."""
    try:
        if file_path.endswith('.pdf'):
            return extract_text(file_path)
        elif file_path.endswith('.docx'):
            return docx2txt.process(file_path)
        else:
            raise ValueError("Unsupported file format. Please upload a .pdf or .docx file.")
    except Exception as e:
        return f"Error extracting text: {str(e)}"

def find_section(text, keyword):
    """Finds a section in the resume text based on a keyword."""
    pattern = rf'{keyword}[\s:]*([\s\S]+?)(?:\n\n|$)'
    match = re.search(pattern, text, re.IGNORECASE)
    return match.group(1).strip() if match else "Section not found."

def extract_experience(text):
    """Extracts years of experience from text."""
    experience_matches = re.findall(r'(\d+)\s+years?', text, re.IGNORECASE)
    return sum(int(year) for year in experience_matches) if experience_matches else 0

def extract_all_skills_from_text(resume_text):
    """
    Extract all skills from the entire resume text using predefined skills.
    """
    words = resume_text.split()
    potential_skills = [word for word in words if len(word) > 2]  # Filter out short words (e.g., "is", "at")

    matched_skills = []
    for skill in potential_skills:
        match = process.extractOne(skill, DESIRED_SKILLS, scorer=fuzz.partial_ratio)
        if match and match[1] >= 70:  # 70% similarity threshold
            matched_skills.append(match[0])

    return list(set(matched_skills))  # Remove duplicates

def calculate_resume_score(matched_skills, experience_years):
    """Calculates resume score based on skills and experience."""
    skill_score = len(matched_skills) * 10  # Each matched skill adds 10 points
    experience_score = experience_years * 5  # Each year of experience adds 5 points
    overall_score = skill_score + experience_score
    return {'skill_score': skill_score, 'experience_score': experience_score, 'overall_score': overall_score}

def calculate_skills_match_percentage(matched_skills, desired_skills):
    """Calculates the percentage of desired skills found in the resume."""
    matched_count = len(set(matched_skills).intersection(desired_skills))
    total_desired = len(desired_skills)
    return round((matched_count / total_desired) * 100, 2) if total_desired > 0 else 0

def analyze_resume(file_path):
    """Analyzes a resume for key sections, skills, and experience."""
    resume_text = extract_text_from_file(file_path)
    if resume_text.startswith("Error"):
        return resume_text

    # Extract key sections
    education_section = find_section(resume_text, "Education")

    # Extract all skills from the entire resume text
    matched_skills = extract_all_skills_from_text(resume_text)

    # Extract years of experience
    years_of_experience = extract_experience(resume_text)

    # Calculate scores
    scores = calculate_resume_score(matched_skills, years_of_experience)

    # Calculate skills match percentage
    skills_match_percentage = calculate_skills_match_percentage(matched_skills, DESIRED_SKILLS)

    # Return structured analysis
    return {
        "education": education_section,
        "matched_skills": matched_skills,
        "experience_years": years_of_experience,
        "scores": scores,
        "skills_match_percentage": skills_match_percentage}

# Upload Button and Interactive Display
upload_button = widgets.FileUpload(accept='.pdf, .docx', multiple=False)
output = widgets.Output()

def on_upload_change(change):
    with output:
        output.clear_output()
        for filename, file_info in upload_button.value.items():
            file_path = f"./{filename}"
            # Save the uploaded file locally
            with open(file_path, 'wb') as f:
                f.write(file_info['content'])

            print(f"Analyzing file: {filename}...")
            analysis_result = analyze_resume(file_path)

            if isinstance(analysis_result, str):  # Error message
                print(analysis_result)
            else:
                # Pretty display and export to JSON
                print("\n--- Resume Analysis Result ---")
                print(json.dumps(analysis_result, indent=4))

                # Save to JSON file
                json_file = f"{os.path.splitext(filename)[0]}_analysis.json"
                with open(json_file, "w") as json_out:
                    json.dump(analysis_result, json_out, indent=4)
                print(f"Results saved to {json_file}")

# Display upload button and output
upload_button.observe(on_upload_change, names='value')
display(upload_button, output)