In [1]:
import spacy
import PyPDF2
import re

print("Welcome to AI-Powered Resume Analyzer!")

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Skills to look for (you can expand this list)
SKILL_KEYWORDS = [
    "python", "java", "c++", "sql", "tableau", "power bi", "hadoop", "spark",
    "machine learning", "data analysis", "nlp", "excel", "cloud", "aws", "azure"
]

# Extract text from PDF
def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            content = page.extract_text()
            if content:
                text += content
    return text

# Skill matcher (simple keyword check)
def extract_skills(text):
    found = []
    text_lower = text.lower()
    for skill in SKILL_KEYWORDS:
        if skill in text_lower:
            found.append(skill.title())
    return found

# Main analysis function
def analyze_resume(text):
    doc = nlp(text)
    entities = [(ent.text.strip(), ent.label_) for ent in doc.ents]

    # Feedback list
    feedback = []

    # Resume components check
    if not any(ent[1] == "PERSON" for ent in entities):
        feedback.append("❌ Name not detected.")

    if not any(ent[1] == "DATE" for ent in entities):
        feedback.append("❌ Work experience duration missing.")

    if not any(ent[1] == "ORG" for ent in entities):
        feedback.append("❌ Company/Organization names not mentioned.")

    if not re.search(r'\S+@\S+', text):
        feedback.append("❌ Email ID not found.")

    if not re.search(r'\b\d{5}[-\s]?\d{5}\b', text):
        feedback.append("❌ Phone number not found (expected 10-digit).")

    # Filter entity display
    relevant_labels = {"PERSON", "ORG", "DATE", "GPE", "PRODUCT", "EVENT"}
    print("\n📄 Named Entities (Relevant):")
    for ent_text, ent_label in entities:
        if ent_label in relevant_labels:
            print(f" - {ent_text}: {ent_label}")

    # Skills
    print("\n🛠 Detected Skills:")
    skills = extract_skills(text)
    if skills:
        print(", ".join(skills))
    else:
        print("❌ No technical skills detected.")

    # Feedback
    print("\n🧠 Resume Feedback:")
    if feedback:
        for issue in feedback:
            print(issue)
    else:
        print("✅ Your resume looks well-structured!")

    return feedback, skills, entities

# === Direct file path for your resume ===
pdf_path = r"E:\Drive E downloader\Arjun Overleaf.pdf"

# Run Analysis
try:
    resume_text = extract_text_from_pdf(pdf_path)
    feedback, skills, entities = analyze_resume(resume_text)

    # Optional: Export feedback to a text file
    with open("Resume_Feedback.txt", "w", encoding='utf-8') as f:
        f.write("🧠 Resume Feedback\n\n")
        for line in feedback:
            f.write(line + "\n")
        f.write("\n🛠 Detected Skills\n\n")
        f.write(", ".join(skills) + "\n")
        f.write("\n📄 Named Entities\n\n")
        for ent_text, ent_label in entities:
            f.write(f"{ent_text} ({ent_label})\n")

    print("\n📝 Feedback saved to 'Resume_Feedback.txt'")

except Exception as e:
    print(f"❌ Error reading file: {e}")


Welcome to AI-Powered Resume Analyzer!

📄 Named Entities (Relevant):
 - DIXIT: ORG
 - 94172-74557: DATE
 - Final-year: ORG
 - AI: GPE
 - ML: ORG
 - SQL: ORG
 - Python: PERSON
 - Tableau: GPE
 - Lovely Professional University Phagwara: ORG
 - CSE: ORG
 - Doraha Public School Doraha: ORG
 - SQL: ORG
 - Tableau: GPE
 - Excel: PRODUCT
 - Power Bi
Platforms: Salesforce: PERSON
 - Hadoop: ORG
 - Hive
Soft Skills: PERSON
 - Team Player,: ORG
 - Holistic Thinking: ORG
 - Outlier: PERSON
 - • Designed: ORG
 - • Tech: ORG
 - C++: PERSON
 - ACADEMIC PROJECTS
FOOD: PERSON
 - • Built: ORG
 - ML: ORG
 - ETA: ORG
 - ggplot2: PERSON
 - Tableau: GPE
 - IPL: ORG
 - Excel: PRODUCT
 - • Delivered: ORG
 - the Naukri Campus Young Turk Competition: ORG
 - All India Competition: EVENT
 - Naukri: PERSON
 - Cloud Computing: PERSON
 - NPTEL: ORG
 - Coursera: PERSON

🛠 Detected Skills:
Python, C++, Sql, Tableau, Power Bi, Hadoop, Data Analysis, Excel, Cloud

🧠 Resume Feedback:
✅ Your resume looks well-structured!