In [2]:
!pip install pypdf
!pip install python-docx
import re
from pypdf import PdfReader
from docx import Document
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

job_profiles = {
    'AI Engineer': ['python', 'deep learning', 'statistics', 'machine learning'],
    'Data Analyst': ['sql', 'excel', 'data visualization', 'statistics'],
    'Network Architect': ['network design', 'cisco', 'routing', 'vpn'],
    'Cybersecurity Analyst': ['network security', 'ethical hacking', 'firewalls', 'penetration testing']
}

def read_resume():
    while True:
        file_path = input("Enter resume file name (Only PDF or DOCX): ")

        try:
            text = ''
            if file_path.endswith('.pdf'):
                with open(file_path, 'rb') as file:
                    reader = PdfReader(file)
                    for page in reader.pages:
                        page_text = page.extract_text()
                        if page_text:
                            text += page_text
            elif file_path.endswith('.docx'):
                doc = Document(file_path)
                for para in doc.paragraphs:
                    text += para.text + '\n'
            else:
                print("Error: Invalid file, please enter a pdf or docx file.")
                continue

            if not text:
                print("Error: Empty file.")
                continue

            return text
        except FileNotFoundError:
            print("Error: File not found.")
        except PermissionError:
            print("Error: Permission denied.")
        except Exception as e:
            print(f"Unexpected error: {e}")

def extract_information(text):
    try:
        email = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
        name_match = re.search(r"(?i)^name[:\s]*([a-zA-Z .'-]+)", text, re.MULTILINE)
        name = name_match.group(1).strip() if name_match else 'Not found'
        skills = re.findall(
            r'\b(python|tensorflow|machine learning|statistics|deep learning|sql|excel|data visualization|network design|cisco|routing|vpn|network security|ethical hacking|firewalls|penetration testing)\b',
            text.lower()
        )
        if not skills:
            raise ValueError("No skills found in the resume.")
        return name, email, skills
    except ValueError as ve:
        return f"Error: {ve}"
    except Exception as e:
        return f"Unexpected error: {e}"

def match_skills(candidate_skills, job_profiles):
    try:
        scores = {}
        vectorizer = TfidfVectorizer(stop_words='english')
        for job, required_skills in job_profiles.items():
            text1 = ' '.join(candidate_skills)
            text2 = ' '.join(required_skills)
            skills_matrix = vectorizer.fit_transform([text1, text2])
            similarity_score = cosine_similarity(skills_matrix[0:1], skills_matrix[1:2])[0][0]
            match_percentage = round(similarity_score * 100, 2)
            scores[job] = match_percentage
        return scores
    except Exception as e:
        return f"Error matching skills: {e}"

text = read_resume()
result = extract_information(text)
if isinstance(result, str) and result.startswith("Error"):
    print(result)
else:
    name, email, skills = result
    match_results = match_skills(skills, job_profiles)
    print(f"\nName: {name}")
    print(f"Email: {email[0] if email else 'Not found'}")
    print("\nJob Match Scores:")
    for job, score in match_results.items():
        print(f"{job}: {score}%")



Enter resume file name (Only PDF or DOCX): Resume2.pdf

Name: Ahmed Fawwaz Al-Qadi
Email: 2440034@uj.edu.sa

Job Match Scores:
AI Engineer: 38.0%
Data Analyst: 19.43%
Network Architect: 0.0%
Cybersecurity Analyst: 0.0%
